deepeval 3.5.0__py3-none-any.whl → 3.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/confident/api.py +2 -0
- deepeval/integrations/langchain/__init__.py +2 -3
- deepeval/integrations/langchain/callback.py +126 -280
- deepeval/integrations/langchain/patch.py +24 -13
- deepeval/integrations/langchain/utils.py +203 -1
- deepeval/integrations/pydantic_ai/patcher.py +220 -185
- deepeval/integrations/pydantic_ai/utils.py +86 -0
- deepeval/metrics/__init__.py +1 -1
- deepeval/metrics/answer_relevancy/template.py +13 -38
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
- deepeval/metrics/faithfulness/template.py +17 -27
- deepeval/models/embedding_models/local_embedding_model.py +2 -2
- deepeval/prompt/api.py +24 -2
- deepeval/prompt/prompt.py +141 -17
- deepeval/synthesizer/synthesizer.py +17 -9
- deepeval/tracing/api.py +3 -0
- deepeval/tracing/context.py +3 -1
- deepeval/tracing/tracing.py +12 -2
- deepeval/tracing/types.py +3 -0
- deepeval/tracing/utils.py +6 -2
- deepeval/utils.py +12 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/METADATA +1 -1
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/RECORD +27 -26
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/LICENSE.md +0 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/WHEEL +0 -0
- {deepeval-3.5.0.dist-info → deepeval-3.5.2.dist-info}/entry_points.txt +0 -0
|
@@ -76,42 +76,31 @@ The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', which states
|
|
|
76
76
|
Provide a 'reason' ONLY if the answer is 'no' or 'idk'.
|
|
77
77
|
The provided claim is drawn from the actual output. Try to provide a correction in the reason using the facts in the retrieval context.
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
|
|
81
|
-
Example retrieval contexts: "Einstein won the Nobel Prize for his discovery of the photoelectric effect. Einstein won the Nobel Prize in 1968. Einstein is a German Scientist."
|
|
82
|
-
Example claims: ["Barack Obama is a caucasian male.", "Zurich is a city in London", "Einstein won the Nobel Prize for the discovery of the photoelectric effect which may have contributed to his fame.", "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect.", "Einstein was a German chef."]
|
|
83
|
-
|
|
84
|
-
Example:
|
|
79
|
+
Expected JSON format:
|
|
85
80
|
{{
|
|
86
81
|
"verdicts": [
|
|
87
|
-
{{
|
|
88
|
-
"verdict": "idk",
|
|
89
|
-
"reason": "The claim about Barack Obama is although incorrect, it is not directly addressed in the retrieval context, and so poses no contradiction."
|
|
90
|
-
}},
|
|
91
|
-
{{
|
|
92
|
-
"verdict": "idk",
|
|
93
|
-
"reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
|
|
94
|
-
}},
|
|
95
82
|
{{
|
|
96
83
|
"verdict": "yes"
|
|
97
84
|
}},
|
|
98
85
|
{{
|
|
99
86
|
"verdict": "no",
|
|
100
|
-
"reason":
|
|
87
|
+
"reason": <explanation_for_contradiction>
|
|
101
88
|
}},
|
|
102
89
|
{{
|
|
103
|
-
"verdict": "
|
|
104
|
-
"reason":
|
|
105
|
-
}}
|
|
90
|
+
"verdict": "idk",
|
|
91
|
+
"reason": <explanation_for_uncertainty>
|
|
92
|
+
}}
|
|
106
93
|
]
|
|
107
94
|
}}
|
|
108
|
-
===== END OF EXAMPLE ======
|
|
109
95
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
96
|
+
Generate ONE verdict per claim - length of 'verdicts' MUST equal number of claims.
|
|
97
|
+
No 'reason' needed for 'yes' verdicts.
|
|
98
|
+
Only use 'no' if retrieval context DIRECTLY CONTRADICTS the claim - never use prior knowledge.
|
|
99
|
+
Use 'idk' for claims not backed up by context OR factually incorrect but non-contradictory - do not assume your knowledge.
|
|
100
|
+
Vague/speculative language in claims (e.g. 'may have', 'possibility') does NOT count as contradiction.
|
|
101
|
+
|
|
102
|
+
**
|
|
103
|
+
IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
|
|
115
104
|
**
|
|
116
105
|
|
|
117
106
|
Retrieval Contexts:
|
|
@@ -128,13 +117,14 @@ JSON:
|
|
|
128
117
|
return f"""Below is a list of Contradictions. It is a list of strings explaining why the 'actual output' does not align with the information presented in the 'retrieval context'. Contradictions happen in the 'actual output', NOT the 'retrieval context'.
|
|
129
118
|
Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), CONCISELY summarize the contradictions to justify the score.
|
|
130
119
|
|
|
131
|
-
|
|
132
|
-
IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
|
|
133
|
-
Example JSON:
|
|
120
|
+
Expected JSON format:
|
|
134
121
|
{{
|
|
135
122
|
"reason": "The score is <faithfulness_score> because <your_reason>."
|
|
136
123
|
}}
|
|
137
124
|
|
|
125
|
+
**
|
|
126
|
+
IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
|
|
127
|
+
|
|
138
128
|
If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
|
|
139
129
|
Your reason MUST use information in `contradiction` in your reason.
|
|
140
130
|
Be sure in your reason, as if you know what the actual output is from the contradictions.
|
|
@@ -41,7 +41,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
41
|
|
|
42
42
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
43
43
|
embedding_model = self.load_model()
|
|
44
|
-
response = embedding_model.embeddings.create(
|
|
44
|
+
response = await embedding_model.embeddings.create(
|
|
45
45
|
model=self.model_name,
|
|
46
46
|
input=[text],
|
|
47
47
|
)
|
|
@@ -49,7 +49,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
49
49
|
|
|
50
50
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
51
51
|
embedding_model = self.load_model()
|
|
52
|
-
response = embedding_model.embeddings.create(
|
|
52
|
+
response = await embedding_model.embeddings.create(
|
|
53
53
|
model=self.model_name,
|
|
54
54
|
input=texts,
|
|
55
55
|
)
|
deepeval/prompt/api.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field
|
|
1
|
+
from pydantic import BaseModel, Field, AliasChoices
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
@@ -20,8 +20,30 @@ class PromptType(Enum):
|
|
|
20
20
|
LIST = "LIST"
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
class PromptVersion(BaseModel):
|
|
24
|
+
id: str
|
|
25
|
+
version: str
|
|
26
|
+
commit_message: str = Field(
|
|
27
|
+
serialization_alias="commitMessage",
|
|
28
|
+
validation_alias=AliasChoices("commit_message", "commitMessage"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PromptVersionsHttpResponse(BaseModel):
|
|
33
|
+
text_versions: Optional[List[PromptVersion]] = Field(
|
|
34
|
+
None,
|
|
35
|
+
serialization_alias="textVersions",
|
|
36
|
+
validation_alias=AliasChoices("text_versions", "textVersions"),
|
|
37
|
+
)
|
|
38
|
+
messages_versions: Optional[List[PromptVersion]] = Field(
|
|
39
|
+
None,
|
|
40
|
+
serialization_alias="messagesVersions",
|
|
41
|
+
validation_alias=AliasChoices("messages_versions", "messagesVersions"),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
23
45
|
class PromptHttpResponse(BaseModel):
|
|
24
|
-
|
|
46
|
+
id: str
|
|
25
47
|
text: Optional[str] = None
|
|
26
48
|
messages: Optional[List[PromptMessage]] = None
|
|
27
49
|
interpolation_type: PromptInterpolationType = Field(
|
deepeval/prompt/prompt.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Optional, List
|
|
2
|
+
from typing import Optional, List, Dict
|
|
3
3
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
|
4
4
|
from rich.console import Console
|
|
5
5
|
import time
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
from pydantic import BaseModel
|
|
9
|
+
import asyncio
|
|
9
10
|
|
|
10
11
|
from deepeval.prompt.api import (
|
|
11
12
|
PromptHttpResponse,
|
|
@@ -13,11 +14,15 @@ from deepeval.prompt.api import (
|
|
|
13
14
|
PromptType,
|
|
14
15
|
PromptInterpolationType,
|
|
15
16
|
PromptPushRequest,
|
|
17
|
+
PromptVersionsHttpResponse,
|
|
16
18
|
)
|
|
17
19
|
from deepeval.prompt.utils import interpolate_text
|
|
18
20
|
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
19
|
-
|
|
20
21
|
from deepeval.constants import HIDDEN_DIR
|
|
22
|
+
from deepeval.utils import (
|
|
23
|
+
get_or_create_event_loop,
|
|
24
|
+
get_or_create_general_event_loop,
|
|
25
|
+
)
|
|
21
26
|
|
|
22
27
|
CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
|
|
23
28
|
|
|
@@ -63,7 +68,23 @@ class Prompt:
|
|
|
63
68
|
self.alias = alias
|
|
64
69
|
self._text_template = template
|
|
65
70
|
self._messages_template = messages_template
|
|
66
|
-
self.
|
|
71
|
+
self._version = None
|
|
72
|
+
self._polling_tasks: Dict[str, asyncio.Task] = {}
|
|
73
|
+
self._refresh_map: Dict[str, int] = {}
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def version(self):
|
|
77
|
+
if self._version is not None and self._version != "latest":
|
|
78
|
+
return self._version
|
|
79
|
+
versions = self._get_versions()
|
|
80
|
+
if len(versions) == 0:
|
|
81
|
+
return "latest"
|
|
82
|
+
else:
|
|
83
|
+
return versions[-1].version
|
|
84
|
+
|
|
85
|
+
@version.setter
|
|
86
|
+
def version(self, value):
|
|
87
|
+
self._version = value
|
|
67
88
|
|
|
68
89
|
def interpolate(self, **kwargs):
|
|
69
90
|
if self._type == PromptType.TEXT:
|
|
@@ -94,6 +115,20 @@ class Prompt:
|
|
|
94
115
|
else:
|
|
95
116
|
raise ValueError(f"Unsupported prompt type: {self._type}")
|
|
96
117
|
|
|
118
|
+
def _get_versions(self) -> List:
|
|
119
|
+
if self.alias is None:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
"Prompt alias is not set. Please set an alias to continue."
|
|
122
|
+
)
|
|
123
|
+
api = Api()
|
|
124
|
+
data, _ = api.send_request(
|
|
125
|
+
method=HttpMethods.GET,
|
|
126
|
+
endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
|
|
127
|
+
url_params={"alias": self.alias},
|
|
128
|
+
)
|
|
129
|
+
versions = PromptVersionsHttpResponse(**data)
|
|
130
|
+
return versions.text_versions or versions.messages_versions or []
|
|
131
|
+
|
|
97
132
|
def _read_from_cache(
|
|
98
133
|
self, alias: str, version: Optional[str] = None
|
|
99
134
|
) -> Optional[CachedPrompt]:
|
|
@@ -123,8 +158,16 @@ class Prompt:
|
|
|
123
158
|
except Exception as e:
|
|
124
159
|
raise Exception(f"Error reading Prompt cache from disk: {e}")
|
|
125
160
|
|
|
126
|
-
def _write_to_cache(
|
|
127
|
-
|
|
161
|
+
def _write_to_cache(
|
|
162
|
+
self,
|
|
163
|
+
version: Optional[str] = None,
|
|
164
|
+
text_template: Optional[str] = None,
|
|
165
|
+
messages_template: Optional[List[PromptMessage]] = None,
|
|
166
|
+
prompt_version_id: Optional[str] = None,
|
|
167
|
+
type: Optional[PromptType] = None,
|
|
168
|
+
interpolation_type: Optional[PromptInterpolationType] = None,
|
|
169
|
+
):
|
|
170
|
+
if not self.alias or not version:
|
|
128
171
|
return
|
|
129
172
|
|
|
130
173
|
cache_data = {}
|
|
@@ -140,14 +183,14 @@ class Prompt:
|
|
|
140
183
|
cache_data[self.alias] = {}
|
|
141
184
|
|
|
142
185
|
# Cache the prompt
|
|
143
|
-
cache_data[self.alias][
|
|
186
|
+
cache_data[self.alias][version] = {
|
|
144
187
|
"alias": self.alias,
|
|
145
|
-
"version":
|
|
146
|
-
"template":
|
|
147
|
-
"messages_template":
|
|
148
|
-
"prompt_version_id":
|
|
149
|
-
"type":
|
|
150
|
-
"interpolation_type":
|
|
188
|
+
"version": version,
|
|
189
|
+
"template": text_template,
|
|
190
|
+
"messages_template": messages_template,
|
|
191
|
+
"prompt_version_id": prompt_version_id,
|
|
192
|
+
"type": type,
|
|
193
|
+
"interpolation_type": interpolation_type,
|
|
151
194
|
}
|
|
152
195
|
|
|
153
196
|
# Ensure directory exists
|
|
@@ -163,12 +206,23 @@ class Prompt:
|
|
|
163
206
|
fallback_to_cache: bool = True,
|
|
164
207
|
write_to_cache: bool = True,
|
|
165
208
|
default_to_cache: bool = True,
|
|
209
|
+
refresh: Optional[int] = 60,
|
|
166
210
|
):
|
|
211
|
+
if refresh:
|
|
212
|
+
default_to_cache = True
|
|
213
|
+
write_to_cache = False
|
|
167
214
|
if self.alias is None:
|
|
168
215
|
raise TypeError(
|
|
169
216
|
"Unable to pull prompt from Confident AI when no alias is provided."
|
|
170
217
|
)
|
|
171
218
|
|
|
219
|
+
# Manage background prompt polling
|
|
220
|
+
loop = get_or_create_general_event_loop()
|
|
221
|
+
if loop.is_running():
|
|
222
|
+
loop.create_task(self.create_polling_task(version, refresh))
|
|
223
|
+
else:
|
|
224
|
+
loop.run_until_complete(self.create_polling_task(version, refresh))
|
|
225
|
+
|
|
172
226
|
if default_to_cache:
|
|
173
227
|
try:
|
|
174
228
|
cached_prompt = self._read_from_cache(self.alias, version)
|
|
@@ -200,11 +254,14 @@ class Prompt:
|
|
|
200
254
|
try:
|
|
201
255
|
data, _ = api.send_request(
|
|
202
256
|
method=HttpMethods.GET,
|
|
203
|
-
endpoint=Endpoints.
|
|
204
|
-
|
|
257
|
+
endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
|
|
258
|
+
url_params={
|
|
259
|
+
"alias": self.alias,
|
|
260
|
+
"versionId": version or "latest",
|
|
261
|
+
},
|
|
205
262
|
)
|
|
206
263
|
response = PromptHttpResponse(
|
|
207
|
-
|
|
264
|
+
id=data["id"],
|
|
208
265
|
text=data.get("text", None),
|
|
209
266
|
messages=data.get("messages", None),
|
|
210
267
|
type=data["type"],
|
|
@@ -243,7 +300,7 @@ class Prompt:
|
|
|
243
300
|
self.version = version or "latest"
|
|
244
301
|
self._text_template = response.text
|
|
245
302
|
self._messages_template = response.messages
|
|
246
|
-
self._prompt_version_id = response.
|
|
303
|
+
self._prompt_version_id = response.id
|
|
247
304
|
self._type = response.type
|
|
248
305
|
self._interpolation_type = response.interpolation_type
|
|
249
306
|
|
|
@@ -254,7 +311,14 @@ class Prompt:
|
|
|
254
311
|
description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
|
|
255
312
|
)
|
|
256
313
|
if write_to_cache:
|
|
257
|
-
self._write_to_cache(
|
|
314
|
+
self._write_to_cache(
|
|
315
|
+
version=version or "latest",
|
|
316
|
+
text_template=response.text,
|
|
317
|
+
messages_template=response.messages,
|
|
318
|
+
prompt_version_id=response.id,
|
|
319
|
+
type=response.type,
|
|
320
|
+
interpolation_type=response.interpolation_type,
|
|
321
|
+
)
|
|
258
322
|
|
|
259
323
|
def push(
|
|
260
324
|
self,
|
|
@@ -300,3 +364,63 @@ class Prompt:
|
|
|
300
364
|
"✅ Prompt successfully pushed to Confident AI! View at "
|
|
301
365
|
f"[link={link}]{link}[/link]"
|
|
302
366
|
)
|
|
367
|
+
|
|
368
|
+
############################################
|
|
369
|
+
### Polling
|
|
370
|
+
############################################
|
|
371
|
+
|
|
372
|
+
async def create_polling_task(
|
|
373
|
+
self,
|
|
374
|
+
version: Optional[str],
|
|
375
|
+
refresh: Optional[int] = 60,
|
|
376
|
+
):
|
|
377
|
+
if version is None:
|
|
378
|
+
return
|
|
379
|
+
|
|
380
|
+
# If polling task doesn't exist, start it
|
|
381
|
+
polling_task: Optional[asyncio.Task] = self._polling_tasks.get(version)
|
|
382
|
+
if refresh:
|
|
383
|
+
self._refresh_map[version] = refresh
|
|
384
|
+
if not polling_task:
|
|
385
|
+
self._polling_tasks[version] = asyncio.create_task(
|
|
386
|
+
self.poll(version)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# If invalid `refresh`, stop the task
|
|
390
|
+
else:
|
|
391
|
+
if polling_task:
|
|
392
|
+
polling_task.cancel()
|
|
393
|
+
self._polling_tasks.pop(version)
|
|
394
|
+
self._refresh_map.pop(version)
|
|
395
|
+
|
|
396
|
+
async def poll(self, version: Optional[str] = None):
|
|
397
|
+
api = Api()
|
|
398
|
+
while True:
|
|
399
|
+
try:
|
|
400
|
+
data, _ = api.send_request(
|
|
401
|
+
method=HttpMethods.GET,
|
|
402
|
+
endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
|
|
403
|
+
url_params={
|
|
404
|
+
"alias": self.alias,
|
|
405
|
+
"versionId": version or "latest",
|
|
406
|
+
},
|
|
407
|
+
)
|
|
408
|
+
response = PromptHttpResponse(
|
|
409
|
+
id=data["id"],
|
|
410
|
+
text=data.get("text", None),
|
|
411
|
+
messages=data.get("messages", None),
|
|
412
|
+
type=data["type"],
|
|
413
|
+
interpolation_type=data["interpolationType"],
|
|
414
|
+
)
|
|
415
|
+
self._write_to_cache(
|
|
416
|
+
version=version or "latest",
|
|
417
|
+
text_template=response.text,
|
|
418
|
+
messages_template=response.messages,
|
|
419
|
+
prompt_version_id=response.id,
|
|
420
|
+
type=response.type,
|
|
421
|
+
interpolation_type=response.interpolation_type,
|
|
422
|
+
)
|
|
423
|
+
except Exception as e:
|
|
424
|
+
pass
|
|
425
|
+
|
|
426
|
+
await asyncio.sleep(self._refresh_map[version])
|
|
@@ -361,7 +361,7 @@ class Synthesizer:
|
|
|
361
361
|
progress if _progress is None else nullcontext()
|
|
362
362
|
):
|
|
363
363
|
|
|
364
|
-
for
|
|
364
|
+
for context_index, context in enumerate(contexts):
|
|
365
365
|
# Calculate pbar lengths
|
|
366
366
|
should_style = (
|
|
367
367
|
self.styling_config.input_format
|
|
@@ -381,7 +381,7 @@ class Synthesizer:
|
|
|
381
381
|
# Add pbars
|
|
382
382
|
pbar_generate_goldens_id = add_pbar(
|
|
383
383
|
progress,
|
|
384
|
-
f"\t⚡ Generating goldens from context #{
|
|
384
|
+
f"\t⚡ Generating goldens from context #{context_index}",
|
|
385
385
|
total=1 + max_goldens_per_context,
|
|
386
386
|
)
|
|
387
387
|
pbar_generate_inputs_id = add_pbar(
|
|
@@ -421,7 +421,9 @@ class Synthesizer:
|
|
|
421
421
|
progress, pbar_generate_goldens_id, remove=False
|
|
422
422
|
)
|
|
423
423
|
|
|
424
|
-
for
|
|
424
|
+
for input_index, data in enumerate(
|
|
425
|
+
qualified_synthetic_inputs
|
|
426
|
+
):
|
|
425
427
|
# Evolve input
|
|
426
428
|
evolved_input, evolutions_used = self._evolve_input(
|
|
427
429
|
input=data.input,
|
|
@@ -429,7 +431,9 @@ class Synthesizer:
|
|
|
429
431
|
num_evolutions=self.evolution_config.num_evolutions,
|
|
430
432
|
evolutions=self.evolution_config.evolutions,
|
|
431
433
|
progress=progress,
|
|
432
|
-
pbar_evolve_input_id=pbar_evolve_input_ids[
|
|
434
|
+
pbar_evolve_input_id=pbar_evolve_input_ids[
|
|
435
|
+
input_index
|
|
436
|
+
],
|
|
433
437
|
remove_pbar=False,
|
|
434
438
|
)
|
|
435
439
|
|
|
@@ -441,7 +445,9 @@ class Synthesizer:
|
|
|
441
445
|
task=self.styling_config.task,
|
|
442
446
|
)
|
|
443
447
|
update_pbar(
|
|
444
|
-
progress,
|
|
448
|
+
progress,
|
|
449
|
+
pbar_evolve_input_ids[input_index],
|
|
450
|
+
remove=False,
|
|
445
451
|
)
|
|
446
452
|
res: SyntheticData = self._generate_schema(
|
|
447
453
|
prompt,
|
|
@@ -455,15 +461,15 @@ class Synthesizer:
|
|
|
455
461
|
input=evolved_input,
|
|
456
462
|
context=context,
|
|
457
463
|
source_file=(
|
|
458
|
-
source_files[
|
|
464
|
+
source_files[context_index]
|
|
459
465
|
if source_files is not None
|
|
460
466
|
else None
|
|
461
467
|
),
|
|
462
468
|
additional_metadata={
|
|
463
469
|
"evolutions": evolutions_used,
|
|
464
|
-
"synthetic_input_quality": scores[
|
|
470
|
+
"synthetic_input_quality": scores[input_index],
|
|
465
471
|
"context_quality": (
|
|
466
|
-
_context_scores[
|
|
472
|
+
_context_scores[context_index]
|
|
467
473
|
if _context_scores is not None
|
|
468
474
|
else None
|
|
469
475
|
),
|
|
@@ -480,7 +486,9 @@ class Synthesizer:
|
|
|
480
486
|
res = self._generate(prompt)
|
|
481
487
|
golden.expected_output = res
|
|
482
488
|
update_pbar(
|
|
483
|
-
progress,
|
|
489
|
+
progress,
|
|
490
|
+
pbar_evolve_input_ids[input_index],
|
|
491
|
+
remove=False,
|
|
484
492
|
)
|
|
485
493
|
|
|
486
494
|
goldens.append(golden)
|
deepeval/tracing/api.py
CHANGED
|
@@ -86,6 +86,9 @@ class BaseApiSpan(BaseModel):
|
|
|
86
86
|
cost_per_output_token: Optional[float] = Field(
|
|
87
87
|
None, alias="costPerOutputToken"
|
|
88
88
|
)
|
|
89
|
+
token_intervals: Optional[Dict[str, str]] = Field(
|
|
90
|
+
None, alias="tokenIntervals"
|
|
91
|
+
)
|
|
89
92
|
|
|
90
93
|
## evals
|
|
91
94
|
metric_collection: Optional[str] = Field(None, alias="metricCollection")
|
deepeval/tracing/context.py
CHANGED
|
@@ -4,7 +4,6 @@ from contextvars import ContextVar
|
|
|
4
4
|
from deepeval.tracing.types import BaseSpan, Trace
|
|
5
5
|
from deepeval.test_case.llm_test_case import ToolCall, LLMTestCase
|
|
6
6
|
from deepeval.tracing.types import LlmSpan, RetrieverSpan
|
|
7
|
-
from deepeval.metrics import BaseMetric
|
|
8
7
|
from deepeval.prompt.prompt import Prompt
|
|
9
8
|
|
|
10
9
|
current_span_context: ContextVar[Optional[BaseSpan]] = ContextVar(
|
|
@@ -117,6 +116,7 @@ def update_llm_span(
|
|
|
117
116
|
output_token_count: Optional[float] = None,
|
|
118
117
|
cost_per_input_token: Optional[float] = None,
|
|
119
118
|
cost_per_output_token: Optional[float] = None,
|
|
119
|
+
token_intervals: Optional[Dict[float, str]] = None,
|
|
120
120
|
prompt: Optional[Prompt] = None,
|
|
121
121
|
):
|
|
122
122
|
current_span = current_span_context.get()
|
|
@@ -132,6 +132,8 @@ def update_llm_span(
|
|
|
132
132
|
current_span.cost_per_input_token = cost_per_input_token
|
|
133
133
|
if cost_per_output_token:
|
|
134
134
|
current_span.cost_per_output_token = cost_per_output_token
|
|
135
|
+
if token_intervals:
|
|
136
|
+
current_span.token_intervals = token_intervals
|
|
135
137
|
if prompt:
|
|
136
138
|
current_span.prompt = prompt
|
|
137
139
|
|
deepeval/tracing/tracing.py
CHANGED
|
@@ -114,7 +114,7 @@ class TraceManager:
|
|
|
114
114
|
self._print_trace_status(
|
|
115
115
|
message=f"WARNING: Exiting with {queue_size + in_flight} abaonded trace(s).",
|
|
116
116
|
trace_worker_status=TraceWorkerStatus.WARNING,
|
|
117
|
-
description=f"Set {CONFIDENT_TRACE_FLUSH}=
|
|
117
|
+
description=f"Set {CONFIDENT_TRACE_FLUSH}=1 as an environment variable to flush remaining traces to Confident AI.",
|
|
118
118
|
)
|
|
119
119
|
|
|
120
120
|
def mask(self, data: Any):
|
|
@@ -314,7 +314,7 @@ class TraceManager:
|
|
|
314
314
|
env_text,
|
|
315
315
|
message + ":",
|
|
316
316
|
description,
|
|
317
|
-
f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=
|
|
317
|
+
f"\nTo disable dev logging, set {CONFIDENT_TRACE_VERBOSE}=0 as an environment variable.",
|
|
318
318
|
)
|
|
319
319
|
else:
|
|
320
320
|
console.print(message_prefix, env_text, message)
|
|
@@ -717,6 +717,16 @@ class TraceManager:
|
|
|
717
717
|
api_span.input_token_count = span.input_token_count
|
|
718
718
|
api_span.output_token_count = span.output_token_count
|
|
719
719
|
|
|
720
|
+
processed_token_intervals = {}
|
|
721
|
+
if span.token_intervals:
|
|
722
|
+
for key, value in span.token_intervals.items():
|
|
723
|
+
time = to_zod_compatible_iso(
|
|
724
|
+
perf_counter_to_datetime(key),
|
|
725
|
+
microsecond_precision=True,
|
|
726
|
+
)
|
|
727
|
+
processed_token_intervals[time] = value
|
|
728
|
+
api_span.token_intervals = processed_token_intervals
|
|
729
|
+
|
|
720
730
|
return api_span
|
|
721
731
|
|
|
722
732
|
|
deepeval/tracing/types.py
CHANGED
|
@@ -102,6 +102,9 @@ class LlmSpan(BaseSpan):
|
|
|
102
102
|
cost_per_output_token: Optional[float] = Field(
|
|
103
103
|
None, serialization_alias="costPerOutputToken"
|
|
104
104
|
)
|
|
105
|
+
token_intervals: Optional[Dict[float, str]] = Field(
|
|
106
|
+
None, serialization_alias="tokenTimes"
|
|
107
|
+
)
|
|
105
108
|
|
|
106
109
|
# for serializing `prompt`
|
|
107
110
|
model_config = {"arbitrary_types_allowed": True}
|
deepeval/tracing/utils.py
CHANGED
|
@@ -100,10 +100,14 @@ def make_json_serializable(obj):
|
|
|
100
100
|
return _serialize(obj)
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
def to_zod_compatible_iso(
|
|
103
|
+
def to_zod_compatible_iso(
|
|
104
|
+
dt: datetime, microsecond_precision: bool = False
|
|
105
|
+
) -> str:
|
|
104
106
|
return (
|
|
105
107
|
dt.astimezone(timezone.utc)
|
|
106
|
-
.isoformat(
|
|
108
|
+
.isoformat(
|
|
109
|
+
timespec="microseconds" if microsecond_precision else "milliseconds"
|
|
110
|
+
)
|
|
107
111
|
.replace("+00:00", "Z")
|
|
108
112
|
)
|
|
109
113
|
|
deepeval/utils.py
CHANGED
|
@@ -148,6 +148,18 @@ def get_or_create_event_loop() -> asyncio.AbstractEventLoop:
|
|
|
148
148
|
return loop
|
|
149
149
|
|
|
150
150
|
|
|
151
|
+
def get_or_create_general_event_loop() -> asyncio.AbstractEventLoop:
|
|
152
|
+
try:
|
|
153
|
+
loop = asyncio.get_event_loop()
|
|
154
|
+
if loop.is_closed():
|
|
155
|
+
raise RuntimeError
|
|
156
|
+
return loop
|
|
157
|
+
except RuntimeError:
|
|
158
|
+
loop = asyncio.new_event_loop()
|
|
159
|
+
asyncio.set_event_loop(loop)
|
|
160
|
+
return loop
|
|
161
|
+
|
|
162
|
+
|
|
151
163
|
def set_should_skip_on_missing_params(yes: bool):
|
|
152
164
|
s = get_settings()
|
|
153
165
|
with s.edit(persist=False):
|