deepeval 3.6.1__py3-none-any.whl → 3.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.6.1"
1
+ __version__: str = "3.6.3"
deepeval/confident/api.py CHANGED
@@ -90,6 +90,7 @@ class Endpoints(Enum):
90
90
  TRACES_ENDPOINT = "/v1/traces"
91
91
  ANNOTATIONS_ENDPOINT = "/v1/annotations"
92
92
  PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
93
+ PROMPTS_LABEL_ENDPOINT = "/v1/prompts/:alias/labels/:label"
93
94
  PROMPTS_ENDPOINT = "/v1/prompts"
94
95
  PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
95
96
  SIMULATE_ENDPOINT = "/v1/simulate"
@@ -836,7 +836,13 @@ def execute_agentic_test_cases(
836
836
  ):
837
837
  if asyncio.iscoroutinefunction(observed_callback):
838
838
  loop = get_or_create_event_loop()
839
- loop.run_until_complete(observed_callback(golden.input))
839
+ coro = observed_callback(golden.input)
840
+ loop.run_until_complete(
841
+ asyncio.wait_for(
842
+ coro,
843
+ timeout=settings.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS,
844
+ )
845
+ )
840
846
  else:
841
847
  observed_callback(golden.input)
842
848
  current_trace: Trace = current_trace_context.get()
@@ -1190,7 +1196,10 @@ async def _a_execute_agentic_test_case(
1190
1196
  _pbar_callback_id=pbar_tags_id,
1191
1197
  ):
1192
1198
  if asyncio.iscoroutinefunction(observed_callback):
1193
- await observed_callback(golden.input)
1199
+ await asyncio.wait_for(
1200
+ observed_callback(golden.input),
1201
+ timeout=settings.DEEPEVAL_PER_TASK_TIMEOUT_SECONDS,
1202
+ )
1194
1203
  else:
1195
1204
  observed_callback(golden.input)
1196
1205
  current_trace: Trace = current_trace_context.get()
@@ -97,7 +97,8 @@ class GEval(BaseMetric):
97
97
  test_case, _additional_context=_additional_context
98
98
  )
99
99
  self.score = (
100
- float(g_score) / self.score_range_span
100
+ (float(g_score) - self.score_range[0])
101
+ / self.score_range_span
101
102
  if not self.strict_mode
102
103
  else int(g_score)
103
104
  )
@@ -140,7 +141,7 @@ class GEval(BaseMetric):
140
141
  test_case, _additional_context=_additional_context
141
142
  )
142
143
  self.score = (
143
- float(g_score) / self.score_range_span
144
+ (float(g_score) - self.score_range[0]) / self.score_range_span
144
145
  if not self.strict_mode
145
146
  else int(g_score)
146
147
  )
@@ -30,7 +30,7 @@ class HallucinationMetric(BaseMetric):
30
30
  threshold: float = 0.5,
31
31
  model: Optional[Union[str, DeepEvalBaseLLM]] = None,
32
32
  include_reason: bool = True,
33
- async_mode: bool = False,
33
+ async_mode: bool = True,
34
34
  strict_mode: bool = False,
35
35
  verbose_mode: bool = False,
36
36
  evaluation_template: Type[
@@ -1,4 +1,4 @@
1
- from typing import List, Union, Dict
1
+ from typing import List, Dict
2
2
 
3
3
  from deepeval.metrics.indicator import metric_progress_indicator
4
4
  from deepeval.metrics.utils import (
@@ -299,7 +299,7 @@ class ToolCorrectnessMetric(BaseMetric):
299
299
  def is_successful(self) -> bool:
300
300
  try:
301
301
  self.success = self.score >= self.threshold
302
- except:
302
+ except (AttributeError, TypeError):
303
303
  self.success = False
304
304
  return self.success
305
305
 
@@ -9,7 +9,7 @@ from deepeval.models.retry_policy import (
9
9
  sdk_retries_for,
10
10
  )
11
11
  from deepeval.models import DeepEvalBaseLLM
12
- from deepeval.models.llms.utils import trim_and_load_json
12
+ from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
13
13
  from deepeval.constants import ProviderSlug as PS
14
14
 
15
15
  # check aiobotocore availability
@@ -40,7 +40,6 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
40
40
  region_name: str,
41
41
  aws_access_key_id: Optional[str] = None,
42
42
  aws_secret_access_key: Optional[str] = None,
43
- temperature: float = 0,
44
43
  input_token_cost: float = 0,
45
44
  output_token_cost: float = 0,
46
45
  generation_kwargs: Optional[Dict] = None,
@@ -53,13 +52,9 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
53
52
  self.region_name = region_name
54
53
  self.aws_access_key_id = aws_access_key_id
55
54
  self.aws_secret_access_key = aws_secret_access_key
56
- self.temperature = temperature
57
55
  self.input_token_cost = input_token_cost
58
56
  self.output_token_cost = output_token_cost
59
57
 
60
- if self.temperature < 0:
61
- raise ValueError("Temperature must be >= 0.")
62
-
63
58
  # prepare aiobotocore session, config, and async exit stack
64
59
  self._session = get_session()
65
60
  self._exit_stack = AsyncExitStack()
@@ -75,7 +70,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
75
70
  def generate(
76
71
  self, prompt: str, schema: Optional[BaseModel] = None
77
72
  ) -> Tuple[Union[str, Dict], float]:
78
- return asyncio.run(self.a_generate(prompt, schema))
73
+ return safe_asyncio_run(self.a_generate(prompt, schema))
79
74
 
80
75
  @retry_bedrock
81
76
  async def a_generate(
@@ -142,34 +137,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
142
137
  ###############################################
143
138
 
144
139
  def get_converse_request_body(self, prompt: str) -> dict:
145
- # Inline parameter translation with defaults
146
- param_mapping = {
147
- "max_tokens": "maxTokens",
148
- "top_p": "topP",
149
- "top_k": "topK",
150
- "stop_sequences": "stopSequences",
151
- }
152
-
153
- # Start with defaults for required parameters
154
- translated_kwargs = {
155
- "maxTokens": self.generation_kwargs.get("max_tokens", 1000),
156
- "topP": self.generation_kwargs.get("top_p", 0),
157
- }
158
-
159
- # Add any other parameters from generation_kwargs
160
- for key, value in self.generation_kwargs.items():
161
- if key not in [
162
- "max_tokens",
163
- "top_p",
164
- ]: # Skip already handled defaults
165
- aws_key = param_mapping.get(key, key)
166
- translated_kwargs[aws_key] = value
167
140
 
168
141
  return {
169
142
  "messages": [{"role": "user", "content": [{"text": prompt}]}],
170
143
  "inferenceConfig": {
171
- "temperature": self.temperature,
172
- **translated_kwargs,
144
+ **self.generation_kwargs,
173
145
  },
174
146
  }
175
147
 
@@ -204,7 +204,6 @@ models_requiring_temperature_1 = [
204
204
  "gpt-5-mini-2025-08-07",
205
205
  "gpt-5-nano",
206
206
  "gpt-5-nano-2025-08-07",
207
- "gpt-5-chat-latest",
208
207
  ]
209
208
 
210
209
 
@@ -1,6 +1,7 @@
1
1
  from typing import Dict
2
2
  import re
3
3
  import json
4
+ import asyncio
4
5
 
5
6
 
6
7
  def trim_and_load_json(
@@ -20,3 +21,24 @@ def trim_and_load_json(
20
21
  raise ValueError(error_str)
21
22
  except Exception as e:
22
23
  raise Exception(f"An unexpected error occurred: {str(e)}")
24
+
25
+
26
+ def safe_asyncio_run(coro):
27
+ """
28
+ Run an async coroutine safely.
29
+ Falls back to run_until_complete if already in a running event loop.
30
+ """
31
+ try:
32
+ return asyncio.run(coro)
33
+ except RuntimeError:
34
+ try:
35
+ loop = asyncio.get_event_loop()
36
+ if loop.is_running():
37
+ future = asyncio.ensure_future(coro)
38
+ return loop.run_until_complete(future)
39
+ else:
40
+ return loop.run_until_complete(coro)
41
+ except Exception as inner_e:
42
+ raise
43
+ except Exception as e:
44
+ raise
deepeval/prompt/api.py CHANGED
@@ -45,6 +45,8 @@ class PromptVersionsHttpResponse(BaseModel):
45
45
 
46
46
  class PromptHttpResponse(BaseModel):
47
47
  id: str
48
+ version: str
49
+ label: Optional[str] = None
48
50
  text: Optional[str] = None
49
51
  messages: Optional[List[PromptMessage]] = None
50
52
  interpolation_type: PromptInterpolationType = Field(
deepeval/prompt/prompt.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from typing import Optional, List, Dict
2
+ from typing import Literal, Optional, List, Dict
3
3
  from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
4
4
  from rich.console import Console
5
5
  import time
@@ -7,6 +7,7 @@ import json
7
7
  import os
8
8
  from pydantic import BaseModel
9
9
  import asyncio
10
+ import portalocker
10
11
 
11
12
  from deepeval.prompt.api import (
12
13
  PromptHttpResponse,
@@ -25,6 +26,8 @@ from deepeval.utils import (
25
26
  )
26
27
 
27
28
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
29
+ VERSION_CACHE_KEY = "version"
30
+ LABEL_CACHE_KEY = "label"
28
31
 
29
32
 
30
33
  class CustomEncoder(json.JSONEncoder):
@@ -39,6 +42,7 @@ class CustomEncoder(json.JSONEncoder):
39
42
  class CachedPrompt(BaseModel):
40
43
  alias: str
41
44
  version: str
45
+ label: Optional[str] = None
42
46
  template: Optional[str]
43
47
  messages_template: Optional[List[PromptMessage]]
44
48
  prompt_version_id: str
@@ -50,6 +54,7 @@ class CachedPrompt(BaseModel):
50
54
 
51
55
 
52
56
  class Prompt:
57
+ label: Optional[str] = None
53
58
  _prompt_version_id: Optional[str] = None
54
59
  _type: Optional[PromptType] = None
55
60
  _interpolation_type: Optional[PromptInterpolationType] = None
@@ -73,8 +78,8 @@ class Prompt:
73
78
  self._text_template = template
74
79
  self._messages_template = messages_template
75
80
  self._version = None
76
- self._polling_tasks: Dict[str, asyncio.Task] = {}
77
- self._refresh_map: Dict[str, int] = {}
81
+ self._polling_tasks: Dict[str, Dict[str, asyncio.Task]] = {}
82
+ self._refresh_map: Dict[str, Dict[str, int]] = {}
78
83
  if template:
79
84
  self._type = PromptType.TEXT
80
85
  elif messages_template:
@@ -138,87 +143,173 @@ class Prompt:
138
143
  return versions.text_versions or versions.messages_versions or []
139
144
 
140
145
  def _read_from_cache(
141
- self, alias: str, version: Optional[str] = None
146
+ self,
147
+ alias: str,
148
+ version: Optional[str] = None,
149
+ label: Optional[str] = None,
142
150
  ) -> Optional[CachedPrompt]:
143
151
  if not os.path.exists(CACHE_FILE_NAME):
144
- raise Exception("No Prompt cache file found")
152
+ return None
145
153
 
146
154
  try:
147
- with open(CACHE_FILE_NAME, "r") as f:
155
+ # Use shared lock for reading to allow concurrent reads
156
+ with portalocker.Lock(
157
+ CACHE_FILE_NAME,
158
+ mode="r",
159
+ flags=portalocker.LOCK_SH | portalocker.LOCK_NB,
160
+ ) as f:
148
161
  cache_data = json.load(f)
149
162
 
150
163
  if alias in cache_data:
151
164
  if version:
152
- if version in cache_data[alias]:
153
- return CachedPrompt(**cache_data[alias][version])
154
- else:
155
- raise Exception(
156
- f"Unable to find Prompt version: '{version}' for alias: '{alias}' in cache"
165
+ if (
166
+ VERSION_CACHE_KEY in cache_data[alias]
167
+ and version in cache_data[alias][VERSION_CACHE_KEY]
168
+ ):
169
+ return CachedPrompt(
170
+ **cache_data[alias][VERSION_CACHE_KEY][version]
157
171
  )
158
- else:
159
- raise Exception(
160
- f"Unable to load Prompt with alias: '{alias}' from cache when no version is specified "
161
- )
162
- else:
163
- raise Exception(
164
- f"Unable to find Prompt with alias: '{alias}' in cache"
165
- )
166
- except Exception as e:
167
- raise Exception(f"Error reading Prompt cache from disk: {e}")
172
+ elif label:
173
+ if (
174
+ LABEL_CACHE_KEY in cache_data[alias]
175
+ and label in cache_data[alias][LABEL_CACHE_KEY]
176
+ ):
177
+ return CachedPrompt(
178
+ **cache_data[alias][LABEL_CACHE_KEY][label]
179
+ )
180
+ return None
181
+ except (portalocker.exceptions.LockException, Exception):
182
+ # If cache is locked, corrupted or unreadable, return None and let it fetch from API
183
+ return None
168
184
 
169
185
  def _write_to_cache(
170
186
  self,
171
- version: Optional[str] = None,
187
+ cache_key: Literal[VERSION_CACHE_KEY, LABEL_CACHE_KEY],
188
+ version: str,
189
+ label: Optional[str] = None,
172
190
  text_template: Optional[str] = None,
173
191
  messages_template: Optional[List[PromptMessage]] = None,
174
192
  prompt_version_id: Optional[str] = None,
175
193
  type: Optional[PromptType] = None,
176
194
  interpolation_type: Optional[PromptInterpolationType] = None,
177
195
  ):
178
- if not self.alias or not version:
196
+ if not self.alias:
179
197
  return
180
198
 
181
- cache_data = {}
182
- if os.path.exists(CACHE_FILE_NAME):
183
- try:
184
- with open(CACHE_FILE_NAME, "r") as f:
185
- cache_data = json.load(f)
186
- except Exception:
187
- cache_data = {}
188
-
189
- # Ensure the cache structure is initialized properly
190
- if self.alias not in cache_data:
191
- cache_data[self.alias] = {}
192
-
193
- # Cache the prompt
194
- cache_data[self.alias][version] = {
195
- "alias": self.alias,
196
- "version": version,
197
- "template": text_template,
198
- "messages_template": messages_template,
199
- "prompt_version_id": prompt_version_id,
200
- "type": type,
201
- "interpolation_type": interpolation_type,
202
- }
203
-
204
199
  # Ensure directory exists
205
200
  os.makedirs(HIDDEN_DIR, exist_ok=True)
206
201
 
207
- # Write back to cache file
208
- with open(CACHE_FILE_NAME, "w") as f:
209
- json.dump(cache_data, f, cls=CustomEncoder)
202
+ try:
203
+ # Use r+ mode if file exists, w mode if it doesn't
204
+ mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
205
+
206
+ with portalocker.Lock(
207
+ CACHE_FILE_NAME,
208
+ mode=mode,
209
+ flags=portalocker.LOCK_EX,
210
+ ) as f:
211
+ # Read existing cache data if file exists and has content
212
+ cache_data = {}
213
+ if mode == "r+":
214
+ try:
215
+ f.seek(0)
216
+ content = f.read()
217
+ if content:
218
+ cache_data = json.loads(content)
219
+ except (json.JSONDecodeError, Exception):
220
+ cache_data = {}
221
+
222
+ # Ensure the cache structure is initialized properly
223
+ if self.alias not in cache_data:
224
+ cache_data[self.alias] = {}
225
+
226
+ if cache_key not in cache_data[self.alias]:
227
+ cache_data[self.alias][cache_key] = {}
228
+
229
+ # Cache the prompt
230
+ cached_entry = {
231
+ "alias": self.alias,
232
+ "version": version,
233
+ "label": label,
234
+ "template": text_template,
235
+ "messages_template": messages_template,
236
+ "prompt_version_id": prompt_version_id,
237
+ "type": type,
238
+ "interpolation_type": interpolation_type,
239
+ }
240
+
241
+ if cache_key == VERSION_CACHE_KEY:
242
+ cache_data[self.alias][cache_key][version] = cached_entry
243
+ else:
244
+ cache_data[self.alias][cache_key][label] = cached_entry
245
+
246
+ # Write back to cache file
247
+ f.seek(0)
248
+ f.truncate()
249
+ json.dump(cache_data, f, cls=CustomEncoder)
250
+ except portalocker.exceptions.LockException:
251
+ # If we can't acquire the lock, silently skip caching
252
+ pass
253
+ except Exception:
254
+ # If any other error occurs during caching, silently skip
255
+ pass
256
+
257
+ def _load_from_cache_with_progress(
258
+ self,
259
+ progress: Progress,
260
+ task_id: int,
261
+ start_time: float,
262
+ version: Optional[str] = None,
263
+ label: Optional[str] = None,
264
+ ):
265
+ """
266
+ Load prompt from cache and update progress bar.
267
+ Raises if unable to load from cache.
268
+ """
269
+ cached_prompt = self._read_from_cache(
270
+ self.alias, version=version, label=label
271
+ )
272
+ if not cached_prompt:
273
+ raise ValueError("Unable to fetch prompt and load from cache")
274
+
275
+ self.version = cached_prompt.version
276
+ self.label = cached_prompt.label
277
+ self._text_template = cached_prompt.template
278
+ self._messages_template = cached_prompt.messages_template
279
+ self._prompt_version_id = cached_prompt.prompt_version_id
280
+ self._type = PromptType(cached_prompt.type)
281
+ self._interpolation_type = PromptInterpolationType(
282
+ cached_prompt.interpolation_type
283
+ )
284
+
285
+ end_time = time.perf_counter()
286
+ time_taken = format(end_time - start_time, ".2f")
287
+ progress.update(
288
+ task_id,
289
+ description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
290
+ )
210
291
 
211
292
  def pull(
212
293
  self,
213
294
  version: Optional[str] = None,
295
+ label: Optional[str] = None,
214
296
  fallback_to_cache: bool = True,
215
297
  write_to_cache: bool = True,
216
298
  default_to_cache: bool = True,
217
299
  refresh: Optional[int] = 60,
218
300
  ):
301
+ should_write_on_first_fetch = False
219
302
  if refresh:
220
303
  default_to_cache = True
221
- write_to_cache = False
304
+ # Check if we need to bootstrap the cache
305
+ cached_prompt = self._read_from_cache(
306
+ self.alias, version=version, label=label
307
+ )
308
+ if cached_prompt is None:
309
+ # No cache exists, so we should write after fetching to bootstrap
310
+ should_write_on_first_fetch = True
311
+ write_to_cache = False # Polling will handle subsequent writes
312
+
222
313
  if self.alias is None:
223
314
  raise TypeError(
224
315
  "Unable to pull prompt from Confident AI when no alias is provided."
@@ -227,15 +318,20 @@ class Prompt:
227
318
  # Manage background prompt polling
228
319
  loop = get_or_create_general_event_loop()
229
320
  if loop.is_running():
230
- loop.create_task(self.create_polling_task(version, refresh))
321
+ loop.create_task(self.create_polling_task(version, label, refresh))
231
322
  else:
232
- loop.run_until_complete(self.create_polling_task(version, refresh))
323
+ loop.run_until_complete(
324
+ self.create_polling_task(version, label, refresh)
325
+ )
233
326
 
234
327
  if default_to_cache:
235
328
  try:
236
- cached_prompt = self._read_from_cache(self.alias, version)
329
+ cached_prompt = self._read_from_cache(
330
+ self.alias, version=version, label=label
331
+ )
237
332
  if cached_prompt:
238
333
  self.version = cached_prompt.version
334
+ self.label = cached_prompt.label
239
335
  self._text_template = cached_prompt.template
240
336
  self._messages_template = cached_prompt.messages_template
241
337
  self._prompt_version_id = cached_prompt.prompt_version_id
@@ -254,58 +350,60 @@ class Prompt:
254
350
  TextColumn("[progress.description]{task.description}"),
255
351
  transient=False,
256
352
  ) as progress:
353
+ HINT_TEXT = (
354
+ f"version='{version or 'latest'}'"
355
+ if not label
356
+ else f"label='{label}'"
357
+ )
257
358
  task_id = progress.add_task(
258
- f"Pulling [rgb(106,0,255)]'{self.alias}' (version='{version or 'latest'}')[/rgb(106,0,255)] from Confident AI...",
359
+ f"Pulling [rgb(106,0,255)]'{self.alias}' ({HINT_TEXT})[/rgb(106,0,255)] from Confident AI...",
259
360
  total=100,
260
361
  )
362
+
261
363
  start_time = time.perf_counter()
262
364
  try:
263
- data, _ = api.send_request(
264
- method=HttpMethods.GET,
265
- endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
266
- url_params={
267
- "alias": self.alias,
268
- "versionId": version or "latest",
269
- },
270
- )
365
+ if label:
366
+ data, _ = api.send_request(
367
+ method=HttpMethods.GET,
368
+ endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
369
+ url_params={
370
+ "alias": self.alias,
371
+ "label": label,
372
+ },
373
+ )
374
+ else:
375
+ data, _ = api.send_request(
376
+ method=HttpMethods.GET,
377
+ endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
378
+ url_params={
379
+ "alias": self.alias,
380
+ "versionId": version or "latest",
381
+ },
382
+ )
383
+
271
384
  response = PromptHttpResponse(
272
385
  id=data["id"],
386
+ version=data.get("version", None),
387
+ label=data.get("label", None),
273
388
  text=data.get("text", None),
274
389
  messages=data.get("messages", None),
275
390
  type=data["type"],
276
391
  interpolation_type=data["interpolationType"],
277
392
  )
278
- except:
279
- try:
280
- if fallback_to_cache:
281
- cached_prompt = self._read_from_cache(
282
- self.alias, version
283
- )
284
- if cached_prompt:
285
- self.version = cached_prompt.version
286
- self._text_template = cached_prompt.template
287
- self._messages_template = (
288
- cached_prompt.messages_template
289
- )
290
- self._prompt_version_id = (
291
- cached_prompt.prompt_version_id
292
- )
293
- self._type = PromptType(cached_prompt.type)
294
- self._interpolation_type = PromptInterpolationType(
295
- cached_prompt.interpolation_type
296
- )
297
-
298
- end_time = time.perf_counter()
299
- time_taken = format(end_time - start_time, ".2f")
300
- progress.update(
301
- task_id,
302
- description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
303
- )
304
- return
305
- except:
306
- raise
307
-
308
- self.version = version or "latest"
393
+ except Exception:
394
+ if fallback_to_cache:
395
+ self._load_from_cache_with_progress(
396
+ progress,
397
+ task_id,
398
+ start_time,
399
+ version=version,
400
+ label=label,
401
+ )
402
+ return
403
+ raise
404
+
405
+ self.version = response.version
406
+ self.label = response.label
309
407
  self._text_template = response.text
310
408
  self._messages_template = response.messages
311
409
  self._prompt_version_id = response.id
@@ -318,9 +416,12 @@ class Prompt:
318
416
  task_id,
319
417
  description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
320
418
  )
321
- if write_to_cache:
419
+ # Write to cache if explicitly requested OR if we need to bootstrap cache for refresh mode
420
+ if write_to_cache or should_write_on_first_fetch:
322
421
  self._write_to_cache(
323
- version=version or "latest",
422
+ cache_key=LABEL_CACHE_KEY if label else VERSION_CACHE_KEY,
423
+ version=response.version,
424
+ label=response.label,
324
425
  text_template=response.text,
325
426
  messages_template=response.messages,
326
427
  prompt_version_id=response.id,
@@ -380,55 +481,114 @@ class Prompt:
380
481
  async def create_polling_task(
381
482
  self,
382
483
  version: Optional[str],
484
+ label: Optional[str],
383
485
  refresh: Optional[int] = 60,
486
+ default_to_cache: bool = True,
384
487
  ):
385
- if version is None:
488
+ if version is None and label is None:
386
489
  return
387
490
 
388
491
  # If polling task doesn't exist, start it
389
- polling_task: Optional[asyncio.Task] = self._polling_tasks.get(version)
492
+ CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
493
+ cache_value = label if label else version
494
+
495
+ # Initialize nested dicts if they don't exist
496
+ if CACHE_KEY not in self._polling_tasks:
497
+ self._polling_tasks[CACHE_KEY] = {}
498
+ if CACHE_KEY not in self._refresh_map:
499
+ self._refresh_map[CACHE_KEY] = {}
500
+
501
+ polling_task: Optional[asyncio.Task] = self._polling_tasks[
502
+ CACHE_KEY
503
+ ].get(cache_value)
504
+
390
505
  if refresh:
391
- self._refresh_map[version] = refresh
506
+ self._refresh_map[CACHE_KEY][cache_value] = refresh
392
507
  if not polling_task:
393
- self._polling_tasks[version] = asyncio.create_task(
394
- self.poll(version)
508
+ self._polling_tasks[CACHE_KEY][cache_value] = (
509
+ asyncio.create_task(
510
+ self.poll(version, label, default_to_cache)
511
+ )
395
512
  )
396
513
 
397
514
  # If invalid `refresh`, stop the task
398
515
  else:
399
516
  if polling_task:
400
517
  polling_task.cancel()
401
- self._polling_tasks.pop(version)
402
- self._refresh_map.pop(version)
518
+ if cache_value in self._polling_tasks[CACHE_KEY]:
519
+ self._polling_tasks[CACHE_KEY].pop(cache_value)
520
+ if cache_value in self._refresh_map[CACHE_KEY]:
521
+ self._refresh_map[CACHE_KEY].pop(cache_value)
403
522
 
404
- async def poll(self, version: Optional[str] = None):
405
- api = Api()
523
+ async def poll(
524
+ self,
525
+ version: Optional[str] = None,
526
+ label: Optional[str] = None,
527
+ default_to_cache: bool = True,
528
+ ):
406
529
  while True:
407
- try:
408
- data, _ = api.send_request(
409
- method=HttpMethods.GET,
410
- endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
411
- url_params={
412
- "alias": self.alias,
413
- "versionId": version or "latest",
414
- },
530
+ if default_to_cache:
531
+ cached_prompt = self._read_from_cache(
532
+ self.alias, version=version, label=label
415
533
  )
534
+ if cached_prompt:
535
+ self.version = cached_prompt.version
536
+ self.label = cached_prompt.label
537
+ self._text_template = cached_prompt.template
538
+ self._messages_template = cached_prompt.messages_template
539
+ self._prompt_version_id = cached_prompt.prompt_version_id
540
+ self._type = PromptType(cached_prompt.type)
541
+ self._interpolation_type = PromptInterpolationType(
542
+ cached_prompt.interpolation_type
543
+ )
544
+ return
545
+
546
+ api = Api()
547
+ try:
548
+ if label:
549
+ data, _ = api.send_request(
550
+ method=HttpMethods.GET,
551
+ endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
552
+ url_params={
553
+ "alias": self.alias,
554
+ "label": label,
555
+ },
556
+ )
557
+ else:
558
+ data, _ = api.send_request(
559
+ method=HttpMethods.GET,
560
+ endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
561
+ url_params={
562
+ "alias": self.alias,
563
+ "versionId": version or "latest",
564
+ },
565
+ )
566
+
416
567
  response = PromptHttpResponse(
417
568
  id=data["id"],
569
+ version=data.get("version", None),
570
+ label=data.get("label", None),
418
571
  text=data.get("text", None),
419
572
  messages=data.get("messages", None),
420
573
  type=data["type"],
421
574
  interpolation_type=data["interpolationType"],
422
575
  )
423
- self._write_to_cache(
424
- version=version or "latest",
425
- text_template=response.text,
426
- messages_template=response.messages,
427
- prompt_version_id=response.id,
428
- type=response.type,
429
- interpolation_type=response.interpolation_type,
430
- )
576
+ if default_to_cache:
577
+ self._write_to_cache(
578
+ cache_key=(
579
+ LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
580
+ ),
581
+ version=response.version,
582
+ label=response.label,
583
+ text_template=response.text,
584
+ messages_template=response.messages,
585
+ prompt_version_id=response.id,
586
+ type=response.type,
587
+ interpolation_type=response.interpolation_type,
588
+ )
431
589
  except Exception as e:
432
590
  pass
433
591
 
434
- await asyncio.sleep(self._refresh_map[version])
592
+ CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
593
+ cache_value = label if label else version
594
+ await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
@@ -1,4 +1,5 @@
1
1
  from pydantic import (
2
+ ConfigDict,
2
3
  Field,
3
4
  BaseModel,
4
5
  model_validator,
@@ -151,6 +152,8 @@ class ToolCall(BaseModel):
151
152
 
152
153
 
153
154
  class LLMTestCase(BaseModel):
155
+ model_config = ConfigDict(extra="ignore")
156
+
154
157
  input: str
155
158
  actual_output: Optional[str] = Field(
156
159
  default=None,
@@ -1,11 +1,16 @@
1
+ import json
2
+
1
3
  from typing import List, Optional, Tuple, Any
2
- from deepeval.tracing.types import Trace, LLMTestCase, ToolCall
3
- from deepeval.tracing import trace_manager, BaseSpan
4
4
  from opentelemetry.sdk.trace.export import ReadableSpan
5
- import json
6
5
 
6
+ from deepeval.evaluate.utils import create_api_test_case
7
+ from deepeval.test_run.api import LLMApiTestCase
8
+ from deepeval.test_run.test_run import global_test_run_manager
9
+ from deepeval.tracing.types import Trace, LLMTestCase, ToolCall
10
+ from deepeval.tracing import trace_manager, BaseSpan
7
11
  from deepeval.tracing.utils import make_json_serializable
8
12
 
13
+
9
14
  GEN_AI_OPERATION_NAMES = ["chat", "generate_content", "task_completion"]
10
15
 
11
16
 
@@ -107,12 +112,12 @@ def check_llm_input_from_gen_ai_attributes(
107
112
  input = json.loads(span.attributes.get("gen_ai.input.messages"))
108
113
  input = _flatten_input(input)
109
114
 
110
- except Exception as e:
115
+ except Exception:
111
116
  pass
112
117
  try:
113
118
  output = json.loads(span.attributes.get("gen_ai.output.messages"))
114
119
  output = _flatten_input(output)
115
- except Exception as e:
120
+ except Exception:
116
121
  pass
117
122
 
118
123
  if input is None and output is None:
@@ -126,7 +131,7 @@ def check_llm_input_from_gen_ai_attributes(
126
131
  and last_event.get("event.name") == "gen_ai.choice"
127
132
  ):
128
133
  output = last_event
129
- except Exception as e:
134
+ except Exception:
130
135
  pass
131
136
 
132
137
  return input, output
@@ -181,7 +186,7 @@ def _flatten_input(input: list) -> list:
181
186
  }
182
187
  )
183
188
  return result
184
- except Exception as e:
189
+ except Exception:
185
190
  return input
186
191
 
187
192
  return input
@@ -192,7 +197,7 @@ def check_tool_name_from_gen_ai_attributes(span: ReadableSpan) -> Optional[str]:
192
197
  gen_ai_tool_name = span.attributes.get("gen_ai.tool.name")
193
198
  if gen_ai_tool_name:
194
199
  return gen_ai_tool_name
195
- except Exception as e:
200
+ except Exception:
196
201
  pass
197
202
 
198
203
  return None
@@ -205,7 +210,7 @@ def check_tool_input_parameters_from_gen_ai_attributes(
205
210
  tool_arguments = span.attributes.get("tool_arguments")
206
211
  if tool_arguments:
207
212
  return json.loads(tool_arguments)
208
- except Exception as e:
213
+ except Exception:
209
214
  pass
210
215
 
211
216
  return None
@@ -224,7 +229,7 @@ def check_span_type_from_gen_ai_attributes(span: ReadableSpan):
224
229
 
225
230
  elif gen_ai_tool_name:
226
231
  return "tool"
227
- except Exception as e:
232
+ except Exception:
228
233
  pass
229
234
 
230
235
  return "base"
@@ -235,7 +240,7 @@ def check_model_from_gen_ai_attributes(span: ReadableSpan):
235
240
  gen_ai_request_model_name = span.attributes.get("gen_ai.request.model")
236
241
  if gen_ai_request_model_name:
237
242
  return gen_ai_request_model_name
238
- except Exception as e:
243
+ except Exception:
239
244
  pass
240
245
 
241
246
  return None
@@ -286,7 +291,7 @@ def prepare_trace_llm_test_case(span: ReadableSpan) -> Optional[LLMTestCase]:
286
291
  tools_called.append(
287
292
  ToolCall.model_validate_json(tool_call_json_str)
288
293
  )
289
- except Exception as e:
294
+ except Exception:
290
295
  pass
291
296
 
292
297
  _expected_tools = span.attributes.get(
@@ -299,7 +304,7 @@ def prepare_trace_llm_test_case(span: ReadableSpan) -> Optional[LLMTestCase]:
299
304
  expected_tools.append(
300
305
  ToolCall.model_validate_json(tool_call_json_str)
301
306
  )
302
- except Exception as e:
307
+ except Exception:
303
308
  pass
304
309
 
305
310
  test_case.tools_called = tools_called
@@ -328,12 +333,6 @@ def parse_list_of_strings(context: List[str]) -> List[str]:
328
333
  return parsed_context
329
334
 
330
335
 
331
- from deepeval.evaluate.utils import create_api_test_case
332
- from deepeval.test_run.api import LLMApiTestCase
333
- from deepeval.test_run.test_run import global_test_run_manager
334
- from typing import Optional
335
-
336
-
337
336
  def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
338
337
  # Accept single trace or list of traces
339
338
  if isinstance(traces, Trace):
@@ -384,53 +383,70 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
384
383
  # return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
385
384
 
386
385
 
386
+ def _normalize_pydantic_ai_messages(span: ReadableSpan) -> Optional[list]:
387
+ try:
388
+ raw = span.attributes.get("pydantic_ai.all_messages")
389
+ if not raw:
390
+ return None
391
+
392
+ messages = raw
393
+ if isinstance(messages, str):
394
+ messages = json.loads(messages)
395
+ elif isinstance(messages, tuple):
396
+ messages = list(messages)
397
+
398
+ if isinstance(messages, list):
399
+ normalized = []
400
+ for m in messages:
401
+ if isinstance(m, str):
402
+ try:
403
+ m = json.loads(m)
404
+ except Exception:
405
+ pass
406
+ normalized.append(m)
407
+ return normalized
408
+ except Exception:
409
+ pass
410
+
411
+ return None
412
+
413
+
387
414
  def check_pydantic_ai_agent_input_output(
388
415
  span: ReadableSpan,
389
416
  ) -> Tuple[Optional[Any], Optional[Any]]:
390
417
  input_val: Optional[Any] = None
391
418
  output_val: Optional[Any] = None
392
419
 
420
+ # Get normalized messages once
421
+ normalized = _normalize_pydantic_ai_messages(span)
422
+
393
423
  # Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
394
- try:
395
- raw = span.attributes.get("pydantic_ai.all_messages")
396
- if raw:
397
- messages = raw
398
- if isinstance(messages, str):
399
- messages = json.loads(messages)
400
- elif isinstance(messages, tuple):
401
- messages = list(messages)
402
-
403
- if isinstance(messages, list):
404
- normalized = []
405
- for m in messages:
406
- if isinstance(m, str):
407
- try:
408
- m = json.loads(m)
409
- except Exception:
410
- pass
411
- normalized.append(m)
412
-
413
- first_user_idx = None
414
- for i, m in enumerate(normalized):
415
- role = None
416
- if isinstance(m, dict):
417
- role = m.get("role") or m.get("author")
418
- if role == "user":
419
- first_user_idx = i
420
- break
421
-
422
- input_val = (
423
- normalized
424
- if first_user_idx is None
425
- else normalized[: first_user_idx + 1]
426
- )
427
- except Exception:
428
- pass
424
+ if normalized:
425
+ try:
426
+ first_user_idx = None
427
+ for i, m in enumerate(normalized):
428
+ role = None
429
+ if isinstance(m, dict):
430
+ role = m.get("role") or m.get("author")
431
+ if role == "user":
432
+ first_user_idx = i
433
+ break
434
+
435
+ input_val = (
436
+ normalized
437
+ if first_user_idx is None
438
+ else normalized[: first_user_idx + 1]
439
+ )
440
+ except Exception:
441
+ pass
429
442
 
430
443
  # Output (agent final_result)
431
444
  try:
432
445
  if span.attributes.get("confident.span.type") == "agent":
433
446
  output_val = span.attributes.get("final_result")
447
+ if not output_val and normalized:
448
+ # Extract the last message if no final_result is available
449
+ output_val = normalized[-1]
434
450
  except Exception:
435
451
  pass
436
452
 
@@ -442,7 +458,7 @@ def check_pydantic_ai_agent_input_output(
442
458
  def check_tool_output(span: ReadableSpan):
443
459
  try:
444
460
  return span.attributes.get("tool_response")
445
- except Exception as e:
461
+ except Exception:
446
462
  pass
447
463
  return None
448
464
 
@@ -208,7 +208,13 @@ class TraceManager:
208
208
  else:
209
209
  # print(f"Ending trace: {trace.root_spans}")
210
210
  self.environment = Environment.TESTING
211
- trace.root_spans = [trace.root_spans[0].children[0]]
211
+ if (
212
+ trace.root_spans
213
+ and len(trace.root_spans) > 0
214
+ and trace.root_spans[0].children
215
+ and len(trace.root_spans[0].children) > 0
216
+ ):
217
+ trace.root_spans = [trace.root_spans[0].children[0]]
212
218
  for root_span in trace.root_spans:
213
219
  root_span.parent_uuid = None
214
220
 
deepeval/tracing/utils.py CHANGED
@@ -1,15 +1,12 @@
1
1
  import os
2
- import time
3
2
  import inspect
4
3
  import json
5
4
  import sys
6
- import difflib
7
5
  from datetime import datetime, timezone
8
6
  from enum import Enum
9
7
  from time import perf_counter
10
- import time
11
8
  from collections import deque
12
- from typing import Any, Dict, Optional, Sequence, Callable
9
+ from typing import Any, Dict, Optional
13
10
 
14
11
  from deepeval.constants import CONFIDENT_TRACING_ENABLED
15
12
 
@@ -189,8 +186,8 @@ def perf_counter_to_datetime(perf_counter_value: float) -> datetime:
189
186
  def replace_self_with_class_name(obj):
190
187
  try:
191
188
  return f"<{obj.__class__.__name__}>"
192
- except:
193
- return f"<self>"
189
+ except Exception:
190
+ return "<self>"
194
191
 
195
192
 
196
193
  def get_deepeval_trace_mode() -> Optional[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.6.1
3
+ Version: 3.6.3
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -359,7 +359,7 @@ for golden in dataset.goldens:
359
359
 
360
360
  @pytest.mark.parametrize(
361
361
  "test_case",
362
- dataset,
362
+ dataset.test_cases,
363
363
  )
364
364
  def test_customer_chatbot(test_case: LLMTestCase):
365
365
  answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
@@ -1,5 +1,5 @@
1
1
  deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
2
- deepeval/_version.py,sha256=60ky4ZrqXl83ooFzPWUHtPFcXD1XP6b9GQDnqw3EHOU,27
2
+ deepeval/_version.py,sha256=1BsEnmEpD1mtVjCYoXBeguVgrKPAi3TRpS_a7ndu4XU,27
3
3
  deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
4
4
  deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
5
5
  deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
138
138
  deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
139
139
  deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
140
140
  deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
141
- deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,8455
141
+ deepeval/confident/api.py,sha256=2ZhrQOtfxcnQSyY6OxrjY17y1yn-NB7pfIiJa20B1Pk,8519
142
142
  deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
143
143
  deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
144
  deepeval/config/settings.py,sha256=gRRi6nXEUKse13xAShU9MA18zo14vpIgl_R0xJ_0vnM,21314
@@ -159,7 +159,7 @@ deepeval/evaluate/api.py,sha256=rkblH0ZFAAdyuF0Ymh7JE1pIJPR9yFuPrn9SQaCEQp4,435
159
159
  deepeval/evaluate/compare.py,sha256=tdSJY4E7YJ_zO3dzvpwngZHLiUI2YQcTWJOLI83htsQ,9855
160
160
  deepeval/evaluate/configs.py,sha256=QfWjaWNxLsgEe8-5j4PIs5WcSyEckiWt0qdpXSpl57M,928
161
161
  deepeval/evaluate/evaluate.py,sha256=NPAJ2iJqJI_RurXKUIC0tft_ozYMIKwZf5iPfmnNhQc,10412
162
- deepeval/evaluate/execute.py,sha256=7RCjn2GGcjqK6cp9-0BtHL6PPJNw5-KXqXL60GN3G5Y,88672
162
+ deepeval/evaluate/execute.py,sha256=XS0XtDGKC1ZOo09lthillfi5aDI5TWFbJ-Y7yICNvGo,89056
163
163
  deepeval/evaluate/types.py,sha256=IGZ3Xsj0UecPI3JNeTpJaK1gDvlepokfCmHwtItIW9M,831
164
164
  deepeval/evaluate/utils.py,sha256=kkliSGzuICeUsXDtlMMPfN95dUKlqarNhfciSffd4gI,23143
165
165
  deepeval/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -240,12 +240,12 @@ deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCE
240
240
  deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
241
241
  deepeval/metrics/faithfulness/template.py,sha256=RuZ0LFm4BjZ8lhVrKPgU3ecHszwkF0fe5-BxAkaP5AA,5839
242
242
  deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
243
- deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
243
+ deepeval/metrics/g_eval/g_eval.py,sha256=CaW7VHPW-SyXt18IE1rSatgagY238s3It-j6SLRI4H4,14395
244
244
  deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
245
245
  deepeval/metrics/g_eval/template.py,sha256=mHj4-mr_HQwbCjpHg7lM_6UesoSatL3g8UGGQAOdT0U,4509
246
246
  deepeval/metrics/g_eval/utils.py,sha256=uUT86jRXVYvLDzcnZvvfWssDyGoBHb66nWcJSg4i1u4,8784
247
247
  deepeval/metrics/hallucination/__init__.py,sha256=rCVlHi2UGzDKmZKi0esFLafmshVBx2WZ0jiIb-KqcYQ,44
248
- deepeval/metrics/hallucination/hallucination.py,sha256=tozck1KwqDv80Nd449QH6_6mG15768eCGxmjoBsbzKw,9549
248
+ deepeval/metrics/hallucination/hallucination.py,sha256=8JN5pj5YWRtl7rgbbFQF6EVBCGm1NV9vaX3_5tScNs4,9548
249
249
  deepeval/metrics/hallucination/schema.py,sha256=V8xbrBLMwJfre-lPuDc7rMEdhHf_1hfgoW1jE_ULvAY,286
250
250
  deepeval/metrics/hallucination/template.py,sha256=hiss1soxSBFqzOt0KmHZdZUzoQsmXnslDyb8HsjALPs,2620
251
251
  deepeval/metrics/indicator.py,sha256=oewo_n5Qet9Zfzo2QQs-EQ8w92siuyDCAmoTZW45ndc,10244
@@ -348,7 +348,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
348
348
  deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
349
349
  deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
350
350
  deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
351
- deepeval/metrics/tool_correctness/tool_correctness.py,sha256=8uyNFGM_aGFAB2aCv2CVDg5cjj0OJe8UVDqaT3Gp3kU,12090
351
+ deepeval/metrics/tool_correctness/tool_correctness.py,sha256=j5wB9mJp7BLbn3bTZd6LlIeub1kXxXGaDVWrzyvBFo4,12111
352
352
  deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
353
353
  deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
354
354
  deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
@@ -370,7 +370,7 @@ deepeval/models/embedding_models/ollama_embedding_model.py,sha256=w3etdIdWvYfVIE
370
370
  deepeval/models/embedding_models/openai_embedding_model.py,sha256=Z1--e3CnNNmwryqmUMxBCaTURjtgKWHqADuUeCqFlSc,3545
371
371
  deepeval/models/hallucination_model.py,sha256=ABi978VKLE_jNHbDzM96kJ08EsZ5ZlvOlJHA_ptSkfQ,1003
372
372
  deepeval/models/llms/__init__.py,sha256=qmvv7wnmTDvys2uUTwQRo-_3DlFV3fGLiewPeQYRsAI,670
373
- deepeval/models/llms/amazon_bedrock_model.py,sha256=xaNV7BnqcsH31ghIKBcacKzetORlFRGHtuBlfr8vbnQ,6183
373
+ deepeval/models/llms/amazon_bedrock_model.py,sha256=3yiUUGU_d_YK7Usq8v5iqG3yHa5VnqeDOoCLG_p8rtc,5185
374
374
  deepeval/models/llms/anthropic_model.py,sha256=5gYRNkYUD7Zl3U0SibBG2YGCQsD6DdTsaBhqdaJlKIw,6072
375
375
  deepeval/models/llms/azure_model.py,sha256=dqINcfoJNqdd9zh5iTPwQ_ToGMOF7iH6YUB-UWRSOlc,10730
376
376
  deepeval/models/llms/deepseek_model.py,sha256=EqBJkKa7rXppCmlnIt_D-Z_r9fbsOUsOAVvN2jWA-Hk,6404
@@ -380,8 +380,8 @@ deepeval/models/llms/kimi_model.py,sha256=ldTefdSVitZYJJQ-_ZsP87iiT5iZ4QCVdfi-Yz
380
380
  deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
381
381
  deepeval/models/llms/local_model.py,sha256=hEyKVA6pkQm9dICUKsMNgjVI3w6gnyMdmBt_EylkWDk,4473
382
382
  deepeval/models/llms/ollama_model.py,sha256=xPO4d4jMY-cQAyHAcMuFvWS8JMWwCUbKP9CMi838Nuc,3307
383
- deepeval/models/llms/openai_model.py,sha256=F02N8BgbiEXH7F6y-a6DkjVcBXFEzr87SEB2gVn4xlU,17192
384
- deepeval/models/llms/utils.py,sha256=ZMZ02kjXAAleq0bIEyjj-gZwe6Gp0b0mK8YMuid2-20,722
383
+ deepeval/models/llms/openai_model.py,sha256=mUvQ8a9FVk4lrdZyS_QRZTK4imufyaCNjZFPeqbc0AM,17167
384
+ deepeval/models/llms/utils.py,sha256=gFM_8eIvdSwN_D4Yqp-j7PkfoiRn_bgu7tlCHol3A6c,1324
385
385
  deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
386
386
  deepeval/models/mlllms/gemini_model.py,sha256=7tHIWD4w_fBz3L7jkKWygn1QpBPk9nl2Kw-yb0Jc3PI,10167
387
387
  deepeval/models/mlllms/ollama_model.py,sha256=_YtYtw8oIMVVI-CFsDicsdeEJUPhw_9ArPxB_1olsJA,4798
@@ -404,8 +404,8 @@ deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
404
404
  deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
405
405
  deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
406
406
  deepeval/prompt/__init__.py,sha256=M99QTWdxOfiNeySGCSqN873Q80PPxqRvjLq4_Mw-X1w,49
407
- deepeval/prompt/api.py,sha256=kR3MkaHuU2wYILKVnvnXhQWxWp0XgtcWX-kIjpMJRl8,1728
408
- deepeval/prompt/prompt.py,sha256=192W5zFBx08nELxRHHDQscMM3psj8OUFV_JR85BZv8Q,15823
407
+ deepeval/prompt/api.py,sha256=665mLKiq8irXWV8kM9P_qFJipdCYZUNQFwW8AkA3itM,1777
408
+ deepeval/prompt/prompt.py,sha256=w2BmKtSzXxobjSlBQqUjdAB0Zwe6IYaLjLg7KQvVDXE,21999
409
409
  deepeval/prompt/utils.py,sha256=Ermw9P-1-T5wQ5uYuj5yWgdj7pVB_JLw8D37Qvmh9ok,1938
410
410
  deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
411
411
  deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
@@ -434,7 +434,7 @@ deepeval/telemetry.py,sha256=JPZw1VBJ5dGiS8k-dzWs5OhMbNlr65QgVretTy33WCg,21704
434
434
  deepeval/test_case/__init__.py,sha256=hLkHxGH0-FFhx4MlJwIbzNHL4pgyLGquh8l0qD-z_cQ,731
435
435
  deepeval/test_case/arena_test_case.py,sha256=PcfDxadlc4yW4AEDdvN32AeUpx2Sms1jvnbX31Xu65o,957
436
436
  deepeval/test_case/conversational_test_case.py,sha256=lF0V1yCGCInQetggm2wbXx-MkuMRs2ScwqIXCSwb1Fs,7534
437
- deepeval/test_case/llm_test_case.py,sha256=uWipuFVzKR3gYSpAbjK6GB_6XdtDMIRDNms-LyZYsuc,12117
437
+ deepeval/test_case/llm_test_case.py,sha256=L-dCvJ4pMPPavZTyN9ZKN30h351DWI_TunmXfHPIjig,12180
438
438
  deepeval/test_case/mcp.py,sha256=Z625NLvz0E_UJpbyfyuAi_4nsqKH6DByBf0rfKd70xU,1879
439
439
  deepeval/test_case/mllm_test_case.py,sha256=8a0YoE72geX_fLI6yk_cObSxCPddwW-DOb-5OPE1-W8,5414
440
440
  deepeval/test_case/utils.py,sha256=5lT7QmhItsQHt44-qQfspuktilcrEyvl2cS0cgUJxds,809
@@ -454,15 +454,15 @@ deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7X
454
454
  deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
455
455
  deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
456
456
  deepeval/tracing/otel/exporter.py,sha256=wPO1ITKpjueLOSNLO6nD2QL9LAd8Xcu6en8hRkB61Wo,28891
457
- deepeval/tracing/otel/utils.py,sha256=THXOoqLau4w6Jlz0YJV3K3vQcVptxo14hcDQCJiPeks,14821
457
+ deepeval/tracing/otel/utils.py,sha256=yAXyPvTjax2HdLcvbVv9pyOVW4S7elIp3RLGuBTr_8o,15113
458
458
  deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
459
459
  deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
460
- deepeval/tracing/tracing.py,sha256=WFXfGLt58Ia9yCohDZBIUGX6mwieoF8489UziuC-NJI,42458
460
+ deepeval/tracing/tracing.py,sha256=xZEyuxdGY259nQaDkGp_qO7Avriv8hrf4L15ZfeMNV8,42728
461
461
  deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
462
- deepeval/tracing/utils.py,sha256=RUcsDpS_aobK3zuNfZGNvjk7aBbBfHOj3aYu2hRZzg0,7993
462
+ deepeval/tracing/utils.py,sha256=SLnks8apGlrV6uVnvFVl2mWYABEkvXbPXnQvq3KaU_o,7943
463
463
  deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
464
- deepeval-3.6.1.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
465
- deepeval-3.6.1.dist-info/METADATA,sha256=UrYM0bqzIvhmMlevcqO-Hcbbm2e5r26FwWEzz2rKua8,18743
466
- deepeval-3.6.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
467
- deepeval-3.6.1.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
468
- deepeval-3.6.1.dist-info/RECORD,,
464
+ deepeval-3.6.3.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
465
+ deepeval-3.6.3.dist-info/METADATA,sha256=BoRZ6BEBPwkypse9Xzw8gRlsezwSrDKsT5RO9C3thQc,18754
466
+ deepeval-3.6.3.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
467
+ deepeval-3.6.3.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
468
+ deepeval-3.6.3.dist-info/RECORD,,