deepeval 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepeval/_version.py CHANGED
@@ -1 +1 @@
1
- __version__: str = "3.5.0"
1
+ __version__: str = "3.5.1"
deepeval/confident/api.py CHANGED
@@ -89,7 +89,9 @@ class Endpoints(Enum):
89
89
  TEST_RUN_ENDPOINT = "/v1/test-run"
90
90
  TRACES_ENDPOINT = "/v1/traces"
91
91
  ANNOTATIONS_ENDPOINT = "/v1/annotations"
92
+ PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
92
93
  PROMPTS_ENDPOINT = "/v1/prompts"
94
+ PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
93
95
  SIMULATE_ENDPOINT = "/v1/simulate"
94
96
  EVALUATE_ENDPOINT = "/v1/evaluate"
95
97
 
@@ -9,6 +9,7 @@ from deepeval.tracing.types import (
9
9
  from deepeval.metrics import BaseMetric, TaskCompletionMetric
10
10
  from deepeval.test_case import LLMTestCase
11
11
  from deepeval.test_run import global_test_run_manager
12
+ import uuid
12
13
 
13
14
  try:
14
15
  from langchain_core.callbacks.base import BaseCallbackHandler
@@ -81,6 +82,26 @@ class CallbackHandler(BaseCallbackHandler):
81
82
  )
82
83
  super().__init__()
83
84
 
85
+ def on_llm_new_token(
86
+ self,
87
+ token: str,
88
+ *,
89
+ chunk,
90
+ run_id: UUID,
91
+ parent_run_id: Optional[UUID] = None,
92
+ tags: Optional[list[str]] = None,
93
+ **kwargs: Any,
94
+ ):
95
+ llm_span: Optional[LlmSpan] = trace_manager.get_span_by_uuid(
96
+ str(run_id)
97
+ )
98
+ if llm_span is None:
99
+ return
100
+ if llm_span.token_intervals is None:
101
+ llm_span.token_intervals = {perf_counter(): token}
102
+ else:
103
+ llm_span.token_intervals[perf_counter()] = token
104
+
84
105
  def check_active_trace_id(self):
85
106
  if self.active_trace_id is None:
86
107
  self.active_trace_id = trace_manager.start_new_trace().uuid
@@ -69,7 +69,7 @@ __all__ = [
69
69
  "ConversationalGEval",
70
70
  "DAGMetric",
71
71
  "DeepAcyclicGraph",
72
- "ConversationalDAGMetric"
72
+ "ConversationalDAGMetric",
73
73
  # RAG metrics
74
74
  "AnswerRelevancyMetric",
75
75
  "FaithfulnessMetric",
@@ -34,62 +34,37 @@ JSON:
34
34
  @staticmethod
35
35
  def generate_verdicts(input: str, statements: str):
36
36
  return f"""For the provided list of statements, determine whether each statement is relevant to address the input.
37
- Please generate a list of JSON with two keys: `verdict` and `reason`.
38
- The 'verdict' key should STRICTLY be either a 'yes', 'idk' or 'no'. Answer 'yes' if the statement is relevant to addressing the original input, 'no' if the statement is irrelevant, and 'idk' if it is ambiguous (eg., not directly relevant but could be used as a supporting point to address the input).
39
- The 'reason' is the reason for the verdict.
40
- Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
41
- The provided statements are statements made in the actual output.
37
+ Generate JSON objects with 'verdict' and 'reason' fields.
38
+ The 'verdict' should be 'yes' (relevant), 'no' (irrelevant), or 'idk' (ambiguous/supporting information).
39
+ Provide 'reason' ONLY for 'no' or 'idk' verdicts.
40
+ The statements are from an AI's actual output.
42
41
 
43
42
  **
44
43
  IMPORTANT: Please make sure to only return in valid and parseable JSON format, with the 'verdicts' key mapping to a list of JSON objects. Ensure all strings are closed appropriately. Repair any invalid JSON before you output it.
45
- Example input:
46
- What features does the new laptop have?
47
44
 
48
- Example:
49
- Example statements:
50
- [
51
- "The new laptop model has a high-resolution Retina display.",
52
- "It includes a fast-charging battery with up to 12 hours of usage.",
53
- "Security features include fingerprint authentication and an encrypted SSD.",
54
- "Every purchase comes with a one-year warranty.",
55
- "24/7 customer support is included.",
56
- "Pineapples taste great on pizza.",
57
- "The laptop is a Dell XPS 13."
58
- ]
59
-
60
- Example JSON:
45
+ Expected JSON format:
61
46
  {{
62
47
  "verdicts": [
63
48
  {{
64
49
  "verdict": "yes"
65
50
  }},
66
- {{
67
- "verdict": "yes"
68
- }},
69
- {{
70
- "verdict": "yes"
71
- }},
72
- {{
73
- "verdict": "no",
74
- "reason": "A one-year warranty is a purchase benefit, not a feature of the laptop itself."
75
- }},
76
51
  {{
77
52
  "verdict": "no",
78
- "reason": "Customer support is a service, not a feature of the laptop."
79
- }},
80
- {{
81
- "verdict": "no",
82
- "reason": "The statement about pineapples on pizza is completely irrelevant to the input, which asks about laptop features."
53
+ "reason": <explanation_for_irrelevance>
83
54
  }},
84
55
  {{
85
56
  "verdict": "idk",
86
- "reason": "The statement about the laptop being a Dell XPS 13 is not directly relevant to the input, but could be used as a supporting point to address the input."
57
+ "reason": <explanation_for_ambiguity>
87
58
  }}
88
59
  ]
89
60
  }}
90
- ===== END OF EXAMPLE ======
91
61
 
92
- Since you are going to generate a verdict for each statement, the number of 'verdicts' SHOULD BE STRICTLY EQUAL to the number of `statements`.
62
+ Generate ONE verdict per statement - number of 'verdicts' MUST equal number of statements.
63
+ 'verdict' must be STRICTLY 'yes', 'no', or 'idk':
64
+ - 'yes': statement is relevant to addressing the input
65
+ - 'no': statement is irrelevant to the input
66
+ - 'idk': statement is ambiguous (not directly relevant but could be supporting information)
67
+ Provide 'reason' ONLY for 'no' or 'idk' verdicts.
93
68
  **
94
69
 
95
70
  Input:
@@ -76,42 +76,31 @@ The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', which states
76
76
  Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
77
77
  The provided claim is drawn from the actual output. Try to provide a correction in the reason using the facts in the retrieval context.
78
78
 
79
- **
80
- IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
81
- Example retrieval contexts: "Einstein won the Nobel Prize for his discovery of the photoelectric effect. Einstein won the Nobel Prize in 1968. Einstein is a German Scientist."
82
- Example claims: ["Barack Obama is a caucasian male.", "Zurich is a city in London", "Einstein won the Nobel Prize for the discovery of the photoelectric effect which may have contributed to his fame.", "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect.", "Einstein was a German chef."]
83
-
84
- Example:
79
+ Expected JSON format:
85
80
  {{
86
81
  "verdicts": [
87
- {{
88
- "verdict": "idk",
89
- "reason": "The claim about Barack Obama is although incorrect, it is not directly addressed in the retrieval context, and so poses no contradiction."
90
- }},
91
- {{
92
- "verdict": "idk",
93
- "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
94
- }},
95
82
  {{
96
83
  "verdict": "yes"
97
84
  }},
98
85
  {{
99
86
  "verdict": "no",
100
- "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead. This contradicts the retrieval context."
87
+ "reason": <explanation_for_contradiction>
101
88
  }},
102
89
  {{
103
- "verdict": "no",
104
- "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead. This contradicts the retrieval context."
105
- }},
90
+ "verdict": "idk",
91
+ "reason": <explanation_for_uncertainty>
92
+ }}
106
93
  ]
107
94
  }}
108
- ===== END OF EXAMPLE ======
109
95
 
110
- The length of 'verdicts' SHOULD BE STRICTLY EQUAL to that of claims.
111
- You DON'T have to provide a reason if the answer is 'yes'.
112
- ONLY provide a 'no' answer if the retrieval context DIRECTLY CONTRADICTS the claims. YOU SHOULD NEVER USE YOUR PRIOR KNOWLEDGE IN YOUR JUDGEMENT.
113
- Claims made using vague, suggestive, speculative language such as 'may have', 'possibility due to', does NOT count as a contradiction.
114
- Claims that are not backed up by the retrieval context or are not mentioned in it MUST be answered 'idk'.
96
+ Generate ONE verdict per claim - length of 'verdicts' MUST equal number of claims.
97
+ No 'reason' needed for 'yes' verdicts.
98
+ Only use 'no' if retrieval context DIRECTLY CONTRADICTS the claim - never use prior knowledge.
99
+ Use 'idk' for claims not backed up by context OR factually incorrect but non-contradictory - do not assume your knowledge.
100
+ Vague/speculative language in claims (e.g. 'may have', 'possibility') does NOT count as contradiction.
101
+
102
+ **
103
+ IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
115
104
  **
116
105
 
117
106
  Retrieval Contexts:
@@ -128,13 +117,14 @@ JSON:
128
117
  return f"""Below is a list of Contradictions. It is a list of strings explaining why the 'actual output' does not align with the information presented in the 'retrieval context'. Contradictions happen in the 'actual output', NOT the 'retrieval context'.
129
118
  Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), CONCISELY summarize the contradictions to justify the score.
130
119
 
131
- **
132
- IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
133
- Example JSON:
120
+ Expected JSON format:
134
121
  {{
135
122
  "reason": "The score is <faithfulness_score> because <your_reason>."
136
123
  }}
137
124
 
125
+ **
126
+ IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
127
+
138
128
  If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
139
129
  Your reason MUST use information in `contradiction` in your reason.
140
130
  Be sure in your reason, as if you know what the actual output is from the contradictions.
deepeval/prompt/api.py CHANGED
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel, Field
1
+ from pydantic import BaseModel, Field, AliasChoices
2
2
  from enum import Enum
3
3
  from typing import List, Optional
4
4
 
@@ -19,9 +19,28 @@ class PromptType(Enum):
19
19
  TEXT = "TEXT"
20
20
  LIST = "LIST"
21
21
 
22
+ class PromptVersion(BaseModel):
23
+ id: str
24
+ version: str
25
+ commit_message: str = Field(
26
+ serialization_alias="commitMessage",
27
+ validation_alias=AliasChoices("commit_message", "commitMessage")
28
+ )
29
+
30
+ class PromptVersionsHttpResponse(BaseModel):
31
+ text_versions: Optional[List[PromptVersion]] = Field(
32
+ None,
33
+ serialization_alias="textVersions",
34
+ validation_alias=AliasChoices("text_versions", "textVersions")
35
+ )
36
+ messages_versions: Optional[List[PromptVersion]] = Field(
37
+ None,
38
+ serialization_alias="messagesVersions",
39
+ validation_alias=AliasChoices("messages_versions", "messagesVersions")
40
+ )
22
41
 
23
42
  class PromptHttpResponse(BaseModel):
24
- promptVersionId: str
43
+ id: str
25
44
  text: Optional[str] = None
26
45
  messages: Optional[List[PromptMessage]] = None
27
46
  interpolation_type: PromptInterpolationType = Field(
@@ -29,7 +48,6 @@ class PromptHttpResponse(BaseModel):
29
48
  )
30
49
  type: PromptType
31
50
 
32
-
33
51
  class PromptPushRequest(BaseModel):
34
52
  alias: str
35
53
  text: Optional[str] = None
@@ -44,4 +62,4 @@ class PromptPushRequest(BaseModel):
44
62
 
45
63
  class PromptApi(BaseModel):
46
64
  id: str
47
- type: PromptType
65
+ type: PromptType
deepeval/prompt/prompt.py CHANGED
@@ -1,11 +1,12 @@
1
1
  from enum import Enum
2
- from typing import Optional, List
2
+ from typing import Optional, List, Dict
3
3
  from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
4
4
  from rich.console import Console
5
5
  import time
6
6
  import json
7
7
  import os
8
8
  from pydantic import BaseModel
9
+ import asyncio
9
10
 
10
11
  from deepeval.prompt.api import (
11
12
  PromptHttpResponse,
@@ -13,11 +14,12 @@ from deepeval.prompt.api import (
13
14
  PromptType,
14
15
  PromptInterpolationType,
15
16
  PromptPushRequest,
17
+ PromptVersionsHttpResponse,
16
18
  )
17
19
  from deepeval.prompt.utils import interpolate_text
18
20
  from deepeval.confident.api import Api, Endpoints, HttpMethods
19
-
20
21
  from deepeval.constants import HIDDEN_DIR
22
+ from deepeval.utils import get_or_create_event_loop
21
23
 
22
24
  CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
23
25
 
@@ -63,7 +65,23 @@ class Prompt:
63
65
  self.alias = alias
64
66
  self._text_template = template
65
67
  self._messages_template = messages_template
66
- self.version = None
68
+ self._version = None
69
+ self._polling_tasks: Dict[str, asyncio.Task] = {}
70
+ self._refresh_map: Dict[str, int] = {}
71
+
72
+ @property
73
+ def version(self):
74
+ if self._version is not None and self._version != "latest":
75
+ return self._version
76
+ versions = self._get_versions()
77
+ if len(versions) == 0:
78
+ return "latest"
79
+ else:
80
+ return versions[-1].version
81
+
82
+ @version.setter
83
+ def version(self, value):
84
+ self._version = value
67
85
 
68
86
  def interpolate(self, **kwargs):
69
87
  if self._type == PromptType.TEXT:
@@ -93,6 +111,20 @@ class Prompt:
93
111
  return interpolated_messages
94
112
  else:
95
113
  raise ValueError(f"Unsupported prompt type: {self._type}")
114
+
115
+ def _get_versions(self) -> List:
116
+ if self.alias is None:
117
+ raise ValueError(
118
+ "Prompt alias is not set. Please set an alias to continue."
119
+ )
120
+ api = Api()
121
+ data, _ = api.send_request(
122
+ method=HttpMethods.GET,
123
+ endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
124
+ url_params={"alias": self.alias},
125
+ )
126
+ versions = PromptVersionsHttpResponse(**data)
127
+ return versions.text_versions or versions.messages_versions or []
96
128
 
97
129
  def _read_from_cache(
98
130
  self, alias: str, version: Optional[str] = None
@@ -123,8 +155,16 @@ class Prompt:
123
155
  except Exception as e:
124
156
  raise Exception(f"Error reading Prompt cache from disk: {e}")
125
157
 
126
- def _write_to_cache(self):
127
- if not self.alias or not self.version:
158
+ def _write_to_cache(
159
+ self,
160
+ version: Optional[str] = None,
161
+ text_template: Optional[str] = None,
162
+ messages_template: Optional[List[PromptMessage]] = None,
163
+ prompt_version_id: Optional[str] = None,
164
+ type: Optional[PromptType] = None,
165
+ interpolation_type: Optional[PromptInterpolationType] = None,
166
+ ):
167
+ if not self.alias or not version:
128
168
  return
129
169
 
130
170
  cache_data = {}
@@ -140,14 +180,14 @@ class Prompt:
140
180
  cache_data[self.alias] = {}
141
181
 
142
182
  # Cache the prompt
143
- cache_data[self.alias][self.version] = {
183
+ cache_data[self.alias][version] = {
144
184
  "alias": self.alias,
145
- "version": self.version,
146
- "template": self._text_template,
147
- "messages_template": self._messages_template,
148
- "prompt_version_id": self._prompt_version_id,
149
- "type": self._type,
150
- "interpolation_type": self._interpolation_type,
185
+ "version": version,
186
+ "template": text_template,
187
+ "messages_template": messages_template,
188
+ "prompt_version_id": prompt_version_id,
189
+ "type": type,
190
+ "interpolation_type": interpolation_type,
151
191
  }
152
192
 
153
193
  # Ensure directory exists
@@ -163,12 +203,22 @@ class Prompt:
163
203
  fallback_to_cache: bool = True,
164
204
  write_to_cache: bool = True,
165
205
  default_to_cache: bool = True,
206
+ refresh: Optional[int] = 60,
166
207
  ):
208
+ if refresh:
209
+ default_to_cache = True
210
+ write_to_cache = False
167
211
  if self.alias is None:
168
212
  raise TypeError(
169
213
  "Unable to pull prompt from Confident AI when no alias is provided."
170
214
  )
171
215
 
216
+ # Manage background prompt polling
217
+ loop = get_or_create_event_loop()
218
+ loop.run_until_complete(
219
+ self.create_polling_task(version, refresh)
220
+ )
221
+
172
222
  if default_to_cache:
173
223
  try:
174
224
  cached_prompt = self._read_from_cache(self.alias, version)
@@ -200,11 +250,11 @@ class Prompt:
200
250
  try:
201
251
  data, _ = api.send_request(
202
252
  method=HttpMethods.GET,
203
- endpoint=Endpoints.PROMPTS_ENDPOINT,
204
- params={"alias": self.alias, "version": version},
253
+ endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
254
+ url_params={"alias": self.alias, "versionId": version or "latest"},
205
255
  )
206
256
  response = PromptHttpResponse(
207
- promptVersionId=data["promptVersionId"],
257
+ id=data["id"],
208
258
  text=data.get("text", None),
209
259
  messages=data.get("messages", None),
210
260
  type=data["type"],
@@ -243,7 +293,7 @@ class Prompt:
243
293
  self.version = version or "latest"
244
294
  self._text_template = response.text
245
295
  self._messages_template = response.messages
246
- self._prompt_version_id = response.promptVersionId
296
+ self._prompt_version_id = response.id
247
297
  self._type = response.type
248
298
  self._interpolation_type = response.interpolation_type
249
299
 
@@ -254,7 +304,14 @@ class Prompt:
254
304
  description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
255
305
  )
256
306
  if write_to_cache:
257
- self._write_to_cache()
307
+ self._write_to_cache(
308
+ version=version or "latest",
309
+ text_template=response.text,
310
+ messages_template=response.messages,
311
+ prompt_version_id=response.id,
312
+ type=response.type,
313
+ interpolation_type=response.interpolation_type,
314
+ )
258
315
 
259
316
  def push(
260
317
  self,
@@ -300,3 +357,60 @@ class Prompt:
300
357
  "✅ Prompt successfully pushed to Confident AI! View at "
301
358
  f"[link={link}]{link}[/link]"
302
359
  )
360
+
361
+ ############################################
362
+ ### Polling
363
+ ############################################
364
+
365
+ async def create_polling_task(
366
+ self,
367
+ version: Optional[str],
368
+ refresh: Optional[int] = 60,
369
+ ):
370
+ if version is None:
371
+ return
372
+
373
+ # If polling task doesn't exist, start it
374
+ polling_task: Optional[asyncio.Task] = self._polling_tasks.get(version)
375
+ if refresh:
376
+ self._refresh_map[version] = refresh
377
+ if not polling_task:
378
+ self._polling_tasks[version] = asyncio.create_task(
379
+ self.poll(version)
380
+ )
381
+
382
+ # If invalid `refresh`, stop the task
383
+ else:
384
+ if polling_task:
385
+ polling_task.cancel()
386
+ self._polling_tasks.pop(version)
387
+ self._refresh_map.pop(version)
388
+
389
+ async def poll(self, version: Optional[str] = None):
390
+ api = Api()
391
+ while True:
392
+ try:
393
+ data, _ = api.send_request(
394
+ method=HttpMethods.GET,
395
+ endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
396
+ url_params={"alias": self.alias, "versionId": version or "latest"},
397
+ )
398
+ response = PromptHttpResponse(
399
+ id=data["id"],
400
+ text=data.get("text", None),
401
+ messages=data.get("messages", None),
402
+ type=data["type"],
403
+ interpolation_type=data["interpolationType"],
404
+ )
405
+ self._write_to_cache(
406
+ version=version or "latest",
407
+ text_template=response.text,
408
+ messages_template=response.messages,
409
+ prompt_version_id=response.id,
410
+ type=response.type,
411
+ interpolation_type=response.interpolation_type,
412
+ )
413
+ except Exception as e:
414
+ pass
415
+
416
+ await asyncio.sleep(self._refresh_map[version])
@@ -361,7 +361,7 @@ class Synthesizer:
361
361
  progress if _progress is None else nullcontext()
362
362
  ):
363
363
 
364
- for i, context in enumerate(contexts):
364
+ for context_index, context in enumerate(contexts):
365
365
  # Calculate pbar lengths
366
366
  should_style = (
367
367
  self.styling_config.input_format
@@ -381,7 +381,7 @@ class Synthesizer:
381
381
  # Add pbars
382
382
  pbar_generate_goldens_id = add_pbar(
383
383
  progress,
384
- f"\t⚡ Generating goldens from context #{i}",
384
+ f"\t⚡ Generating goldens from context #{context_index}",
385
385
  total=1 + max_goldens_per_context,
386
386
  )
387
387
  pbar_generate_inputs_id = add_pbar(
@@ -421,7 +421,9 @@ class Synthesizer:
421
421
  progress, pbar_generate_goldens_id, remove=False
422
422
  )
423
423
 
424
- for j, data in enumerate(qualified_synthetic_inputs):
424
+ for input_index, data in enumerate(
425
+ qualified_synthetic_inputs
426
+ ):
425
427
  # Evolve input
426
428
  evolved_input, evolutions_used = self._evolve_input(
427
429
  input=data.input,
@@ -429,7 +431,9 @@ class Synthesizer:
429
431
  num_evolutions=self.evolution_config.num_evolutions,
430
432
  evolutions=self.evolution_config.evolutions,
431
433
  progress=progress,
432
- pbar_evolve_input_id=pbar_evolve_input_ids[j],
434
+ pbar_evolve_input_id=pbar_evolve_input_ids[
435
+ input_index
436
+ ],
433
437
  remove_pbar=False,
434
438
  )
435
439
 
@@ -441,7 +445,9 @@ class Synthesizer:
441
445
  task=self.styling_config.task,
442
446
  )
443
447
  update_pbar(
444
- progress, pbar_evolve_input_ids[j], remove=False
448
+ progress,
449
+ pbar_evolve_input_ids[input_index],
450
+ remove=False,
445
451
  )
446
452
  res: SyntheticData = self._generate_schema(
447
453
  prompt,
@@ -455,15 +461,15 @@ class Synthesizer:
455
461
  input=evolved_input,
456
462
  context=context,
457
463
  source_file=(
458
- source_files[i]
464
+ source_files[context_index]
459
465
  if source_files is not None
460
466
  else None
461
467
  ),
462
468
  additional_metadata={
463
469
  "evolutions": evolutions_used,
464
- "synthetic_input_quality": scores[j],
470
+ "synthetic_input_quality": scores[input_index],
465
471
  "context_quality": (
466
- _context_scores[i]
472
+ _context_scores[context_index]
467
473
  if _context_scores is not None
468
474
  else None
469
475
  ),
@@ -480,7 +486,9 @@ class Synthesizer:
480
486
  res = self._generate(prompt)
481
487
  golden.expected_output = res
482
488
  update_pbar(
483
- progress, pbar_evolve_input_ids[j], remove=False
489
+ progress,
490
+ pbar_evolve_input_ids[input_index],
491
+ remove=False,
484
492
  )
485
493
 
486
494
  goldens.append(golden)
deepeval/tracing/api.py CHANGED
@@ -86,6 +86,9 @@ class BaseApiSpan(BaseModel):
86
86
  cost_per_output_token: Optional[float] = Field(
87
87
  None, alias="costPerOutputToken"
88
88
  )
89
+ token_intervals: Optional[Dict[str, str]] = Field(
90
+ None, alias="tokenIntervals"
91
+ )
89
92
 
90
93
  ## evals
91
94
  metric_collection: Optional[str] = Field(None, alias="metricCollection")
@@ -4,7 +4,6 @@ from contextvars import ContextVar
4
4
  from deepeval.tracing.types import BaseSpan, Trace
5
5
  from deepeval.test_case.llm_test_case import ToolCall, LLMTestCase
6
6
  from deepeval.tracing.types import LlmSpan, RetrieverSpan
7
- from deepeval.metrics import BaseMetric
8
7
  from deepeval.prompt.prompt import Prompt
9
8
 
10
9
  current_span_context: ContextVar[Optional[BaseSpan]] = ContextVar(
@@ -117,6 +116,7 @@ def update_llm_span(
117
116
  output_token_count: Optional[float] = None,
118
117
  cost_per_input_token: Optional[float] = None,
119
118
  cost_per_output_token: Optional[float] = None,
119
+ token_intervals: Optional[Dict[float, str]] = None,
120
120
  prompt: Optional[Prompt] = None,
121
121
  ):
122
122
  current_span = current_span_context.get()
@@ -132,6 +132,8 @@ def update_llm_span(
132
132
  current_span.cost_per_input_token = cost_per_input_token
133
133
  if cost_per_output_token:
134
134
  current_span.cost_per_output_token = cost_per_output_token
135
+ if token_intervals:
136
+ current_span.token_intervals = token_intervals
135
137
  if prompt:
136
138
  current_span.prompt = prompt
137
139
 
@@ -114,7 +114,7 @@ class TraceManager:
114
114
  self._print_trace_status(
115
115
  message=f"WARNING: Exiting with {queue_size + in_flight} abaonded trace(s).",
116
116
  trace_worker_status=TraceWorkerStatus.WARNING,
117
- description=f"Set {CONFIDENT_TRACE_FLUSH}=YES as an environment variable to flush remaining traces to Confident AI.",
117
+ description=f"Set {CONFIDENT_TRACE_FLUSH}=1 as an environment variable to flush remaining traces to Confident AI.",
118
118
  )
119
119
 
120
120
  def mask(self, data: Any):
@@ -314,7 +314,7 @@ class TraceManager:
314
314
  env_text,
315
315
  message + ":",
316
316
  description,
317
- f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=NO as an environment variable.",
317
+ f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=0 as an environment variable.",
318
318
  )
319
319
  else:
320
320
  console.print(message_prefix, env_text, message)
@@ -717,6 +717,16 @@ class TraceManager:
717
717
  api_span.input_token_count = span.input_token_count
718
718
  api_span.output_token_count = span.output_token_count
719
719
 
720
+ processed_token_intervals = {}
721
+ if span.token_intervals:
722
+ for key, value in span.token_intervals.items():
723
+ time = to_zod_compatible_iso(
724
+ perf_counter_to_datetime(key),
725
+ microsecond_precision=True,
726
+ )
727
+ processed_token_intervals[time] = value
728
+ api_span.token_intervals = processed_token_intervals
729
+
720
730
  return api_span
721
731
 
722
732
 
deepeval/tracing/types.py CHANGED
@@ -102,6 +102,9 @@ class LlmSpan(BaseSpan):
102
102
  cost_per_output_token: Optional[float] = Field(
103
103
  None, serialization_alias="costPerOutputToken"
104
104
  )
105
+ token_intervals: Optional[Dict[float, str]] = Field(
106
+ None, serialization_alias="tokenTimes"
107
+ )
105
108
 
106
109
  # for serializing `prompt`
107
110
  model_config = {"arbitrary_types_allowed": True}
deepeval/tracing/utils.py CHANGED
@@ -100,10 +100,14 @@ def make_json_serializable(obj):
100
100
  return _serialize(obj)
101
101
 
102
102
 
103
- def to_zod_compatible_iso(dt: datetime) -> str:
103
+ def to_zod_compatible_iso(
104
+ dt: datetime, microsecond_precision: bool = False
105
+ ) -> str:
104
106
  return (
105
107
  dt.astimezone(timezone.utc)
106
- .isoformat(timespec="milliseconds")
108
+ .isoformat(
109
+ timespec="microseconds" if microsecond_precision else "milliseconds"
110
+ )
107
111
  .replace("+00:00", "Z")
108
112
  )
109
113
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.5.0
3
+ Version: 3.5.1
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
2
- deepeval/_version.py,sha256=xgoMNdDXsY3c4GfV1_DVK-xGdMOp5KCDaKln5j0PJdY,27
2
+ deepeval/_version.py,sha256=4-DIaf0_guINnwTWGKRHVcep723FM_T7p_K6jccjok0,27
3
3
  deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
4
4
  deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
5
5
  deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
138
138
  deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
139
139
  deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
140
140
  deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
141
- deepeval/confident/api.py,sha256=-2i3IBLtj5bUIImwOF6ltGVR3ZyViIbIC38XxwWvf54,8318
141
+ deepeval/confident/api.py,sha256=bOC71TaVAEgoXFtJ9yMo0-atmUUdBuvaclMGczMcR6o,8455
142
142
  deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
143
143
  deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
144
  deepeval/config/settings.py,sha256=e7sk6_3I14hG457e75DoJd9Ojo3rOkpBZzsMYlj4gKQ,18139
@@ -172,7 +172,7 @@ deepeval/integrations/hugging_face/rich_manager.py,sha256=WvFtPGpPmGeg2Ftsnojga6
172
172
  deepeval/integrations/hugging_face/tests/test_callbacks.py,sha256=88Wyg-aDaXujj9jHeGdFF3ITSl2-y7eaJGWgSyvvDi8,4607
173
173
  deepeval/integrations/hugging_face/utils.py,sha256=HUKdQcTIb76Ct69AS737oPxmlVxk5fw2UbT2pLn-o8k,1817
174
174
  deepeval/integrations/langchain/__init__.py,sha256=EJz0UvoLjBG5cftOJNJQ5qLawwHHRnSQLgBu_SaqZ1Q,94
175
- deepeval/integrations/langchain/callback.py,sha256=Cp3t0zPwXYnj5Hs3PhYzFTLmQF7cc3S2eH2vnHwiT8k,15876
175
+ deepeval/integrations/langchain/callback.py,sha256=hps3eq8rYZIvxbGtCyAxmb0VTTgAX1HqDBxQLGxZYho,16450
176
176
  deepeval/integrations/langchain/patch.py,sha256=yWkdGMzRVggBcPFx__HRlUoYtASLh7Vef6mqOIZ9LDY,992
177
177
  deepeval/integrations/langchain/utils.py,sha256=gSs4VOIzftVS9VLbQSs94R-Pi7D6CGFt84SzccwOsWg,3209
178
178
  deepeval/integrations/llama_index/__init__.py,sha256=zBwUFQXDp6QFtp1cfANy8ucV08rjc93nyxM9o9hWjT0,216
@@ -183,11 +183,11 @@ deepeval/integrations/pydantic_ai/__init__.py,sha256=36fBKBLRo1y5jFlj0Y4xhDJsiq4
183
183
  deepeval/integrations/pydantic_ai/otel.py,sha256=2DpO3RapdztXPlT9BWhQfF4dJDMyp2X7YvuplJ0SwC8,1661
184
184
  deepeval/integrations/pydantic_ai/patcher.py,sha256=wszU2YROZAQovyz1ZNRvTtsuJ5By_x4SF6yjtmItcNk,12210
185
185
  deepeval/key_handler.py,sha256=damdQEBLGy4IVk5DR5-E3blIZdLbcMtyeGAFn_4_SG4,6505
186
- deepeval/metrics/__init__.py,sha256=xofaK_bJq0QCSerSWYjHYRXXch9YQwZHxIfVAv1G7fo,4012
186
+ deepeval/metrics/__init__.py,sha256=nvO0Wv2JROjK1I9MDNIFUJlrRAZI2C0xbGYSBZK5q4g,4013
187
187
  deepeval/metrics/answer_relevancy/__init__.py,sha256=WbZUpoSg2GQoqJ4VIRirVVQ1JDx5xwT-RskwqNKfWGM,46
188
188
  deepeval/metrics/answer_relevancy/answer_relevancy.py,sha256=vlc7BzUAtYVW62d5Qa-fIHSLOX239KFwCE7fCGP8jGE,10935
189
189
  deepeval/metrics/answer_relevancy/schema.py,sha256=N8wIBh4qwk4-BZOEyPJM-MB2_0dbkqXHv0aCfsIkROo,405
190
- deepeval/metrics/answer_relevancy/template.py,sha256=vU6yAsiCYtvx5S1g74WeEdJmuGvd2ZtwDDqM5-jfYkM,5174
190
+ deepeval/metrics/answer_relevancy/template.py,sha256=InlbD3nufOFmohX3c7tnBwxDbcXwYbc57nPjIiW0Gmc,4030
191
191
  deepeval/metrics/arena_g_eval/__init__.py,sha256=pVDIsWD_DLumOLegJrVSozcWwzsaxJXE5cIN7KxCzws,37
192
192
  deepeval/metrics/arena_g_eval/arena_g_eval.py,sha256=B4Gjct3w5VGPxmumBblFVajdUIdWJTNR0hvMuhgIFg0,11661
193
193
  deepeval/metrics/arena_g_eval/schema.py,sha256=3wipvUpZNO0O4QuWFy1LaLenfTYxLKldCERmP3sVtYI,288
@@ -236,7 +236,7 @@ deepeval/metrics/dag/utils.py,sha256=66D88fpjIUdVwZvYV8a1L9TlX1wvbCVuE6Y8BFTbpkE
236
236
  deepeval/metrics/faithfulness/__init__.py,sha256=RffAtTOSdtWO1gHVMnPI-imJahf3JENOoJRiNw-Xv4g,43
237
237
  deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCEtLOnXLzncvJLVv60,12887
238
238
  deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
239
- deepeval/metrics/faithfulness/template.py,sha256=q5NvVBcUEZgyMy_1zHFGtDNU7PoREFJGOkVQbZf8r-g,7117
239
+ deepeval/metrics/faithfulness/template.py,sha256=RuZ0LFm4BjZ8lhVrKPgU3ecHszwkF0fe5-BxAkaP5AA,5839
240
240
  deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
241
241
  deepeval/metrics/g_eval/g_eval.py,sha256=JI3rTaEClYgiL9oLaVFh7sunqGoXI7qBeBgi9RkSwDs,14327
242
242
  deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
@@ -402,8 +402,8 @@ deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
402
402
  deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
403
403
  deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
404
404
  deepeval/prompt/__init__.py,sha256=M99QTWdxOfiNeySGCSqN873Q80PPxqRvjLq4_Mw-X1w,49
405
- deepeval/prompt/api.py,sha256=VxRxnnCPiTyiIzP4MrpB7dgefgRNk3xOH5Dn5Y1Hk0o,1035
406
- deepeval/prompt/prompt.py,sha256=nTZ5lkjMj4YgtPOL0Tk4uzCGAEpZN7ityOXsrrm5mFI,11110
405
+ deepeval/prompt/api.py,sha256=q0CU57eGZkCL3CYIFS6HG-JLyptRhqJRLU9Al8roCRk,1705
406
+ deepeval/prompt/prompt.py,sha256=XVGFOK4eQfjWKm-N1GtRilWqtjLM3JNmIqeuYxQx6Xk,15170
407
407
  deepeval/prompt/utils.py,sha256=Gk0zj_9BK8MQccs8GmiC8o-YVtkou6ZJEz8kWgW5Mog,1678
408
408
  deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
409
409
  deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
@@ -421,7 +421,7 @@ deepeval/synthesizer/chunking/context_generator.py,sha256=0c--WxTiGLMF0l5sgjeWQF
421
421
  deepeval/synthesizer/chunking/doc_chunker.py,sha256=5PZnxNDuNCngz3wZWG5QeCINec6cIq1ko1bwaDNhxAI,9416
422
422
  deepeval/synthesizer/config.py,sha256=vcSi6upnmd667dAGANTTdPmY0z5sQ8Ctal7Xr4-tbhA,1934
423
423
  deepeval/synthesizer/schema.py,sha256=PIv3012VMg_v-Ylwn08-4tNjf4QShBSg-kaCkgtdA88,879
424
- deepeval/synthesizer/synthesizer.py,sha256=SGH--Xd6VRcnI6F2pP4co8F_8r2CvNtgvbOLEKDOZW8,59709
424
+ deepeval/synthesizer/synthesizer.py,sha256=yyJQgdoDK4-bc92N7fY1-I5DrnUjQEUTQP0UMmwJoJ0,60045
425
425
  deepeval/synthesizer/templates/__init__.py,sha256=C-wSGQeMRxTdSBJbgeyAM5Iu6mkHVSYbNfz0AY9K5Yc,209
426
426
  deepeval/synthesizer/templates/template.py,sha256=nTH-k8XbvZQD_lagsaf6kmT6oylFjQ7gEseHYB1Zyso,39807
427
427
  deepeval/synthesizer/templates/template_extraction.py,sha256=NkpzP-MkoefokVJBZn9s1ErDvI3o9ocY5_ZY85zj4KE,1972
@@ -443,8 +443,8 @@ deepeval/test_run/hooks.py,sha256=Qnd06bk9RJN4WmFUzJrBAi3Xj261hzyzI2iRmG8wbKw,37
443
443
  deepeval/test_run/hyperparameters.py,sha256=f7M07w1EfT8YPtiD9xVIVYa3ZewkxewSkK7knwv0YlY,2289
444
444
  deepeval/test_run/test_run.py,sha256=eCo_NESZruIAtSu2feSbz9AtOcu9v92TNiS0OON_i-I,33611
445
445
  deepeval/tracing/__init__.py,sha256=OPsA_VmYNLC1M-WYJ37R6SxGyLnoXIkuyMBTcAneeao,530
446
- deepeval/tracing/api.py,sha256=2e40rVCUSODj_M1lGuzmg9SNxucMK4b0G0lqkG5Buyw,4769
447
- deepeval/tracing/context.py,sha256=oc7QAUVLGTiMw9oYq5lc_5JoKLzmmAkUnvxvCNyVP1A,5242
446
+ deepeval/tracing/api.py,sha256=rq4rB5f3tfrv6l4mRJmDrwRj5CH4dyatwxhG7p8xbVk,4867
447
+ deepeval/tracing/context.py,sha256=mA82v7nXVLdM6tQrul8zt7H_sap-8Nfrm2uCpbT5ffM,5337
448
448
  deepeval/tracing/offline_evals/__init__.py,sha256=bEniJAl7PmS9u2ksiOTfHtlCPJ9_CJV5R6umrUOX5MM,102
449
449
  deepeval/tracing/offline_evals/api.py,sha256=eBfqh2uWyeRkIeGhjrN1bTQzAEow-XPubs-42WEZ2QQ,510
450
450
  deepeval/tracing/offline_evals/span.py,sha256=pXqTVXs-WnjRVpCYYEbNe0zSM6Wz9GsKHsM5ZcWxrmM,1802
@@ -455,12 +455,12 @@ deepeval/tracing/otel/exporter.py,sha256=dXQd834zm5rm1ss9pWkBBlk-JSdtiw7aFLso2hM
455
455
  deepeval/tracing/otel/utils.py,sha256=g8yAzhqbPh1fOKCWkfNekC6AVotLfu1SUcfNMo6zii8,9786
456
456
  deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
457
457
  deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
458
- deepeval/tracing/tracing.py,sha256=StvwFEG3MG67n7PBEyDDycdj0myMbP3LMB_FBhaZH-Y,38741
459
- deepeval/tracing/types.py,sha256=3w5HEI6y4zuzVr8xGEEzDviLZCX_s_pK85qbwnyf1aY,5196
460
- deepeval/tracing/utils.py,sha256=eTEickbDvRiOu1twNolh4sHnjZF49vqdLgI74BudeTw,6357
458
+ deepeval/tracing/tracing.py,sha256=Ot0wzUHxhaK4wZov8cgai-i6kiyZUvNzj9MyRhbjZUg,39191
459
+ deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
460
+ deepeval/tracing/utils.py,sha256=w_kdhuyBCygllnbqLpDdKJqpJo42t3ZMlGhNicV2A8c,6467
461
461
  deepeval/utils.py,sha256=EimWDwI1pKCE8vl6kuTnGbGT6ep9zHL5sZ0o-gj49XI,16857
462
- deepeval-3.5.0.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
463
- deepeval-3.5.0.dist-info/METADATA,sha256=KBAB5m11q4GAhVwCJBmXZDtaYtKoAO3sQ0vg-ajFRLg,18682
464
- deepeval-3.5.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
465
- deepeval-3.5.0.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
466
- deepeval-3.5.0.dist-info/RECORD,,
462
+ deepeval-3.5.1.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
463
+ deepeval-3.5.1.dist-info/METADATA,sha256=KDVwTo18ZlKNfIb_f8oomBUiceMMj7NqvVSKNvN1wbk,18682
464
+ deepeval-3.5.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
465
+ deepeval-3.5.1.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
466
+ deepeval-3.5.1.dist-info/RECORD,,