deepeval 3.6.2__py3-none-any.whl → 3.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/confident/api.py +1 -0
- deepeval/metrics/g_eval/g_eval.py +3 -2
- deepeval/metrics/tool_correctness/tool_correctness.py +12 -7
- deepeval/models/llms/amazon_bedrock_model.py +3 -31
- deepeval/models/llms/openai_model.py +0 -1
- deepeval/models/llms/utils.py +22 -0
- deepeval/prompt/api.py +2 -0
- deepeval/prompt/prompt.py +355 -148
- deepeval/tracing/otel/utils.py +52 -35
- {deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/METADATA +1 -1
- {deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/RECORD +15 -15
- {deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/WHEEL +0 -0
- {deepeval-3.6.2.dist-info → deepeval-3.6.4.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.6.
|
|
1
|
+
__version__: str = "3.6.4"
|
deepeval/confident/api.py
CHANGED
|
@@ -90,6 +90,7 @@ class Endpoints(Enum):
|
|
|
90
90
|
TRACES_ENDPOINT = "/v1/traces"
|
|
91
91
|
ANNOTATIONS_ENDPOINT = "/v1/annotations"
|
|
92
92
|
PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:versionId"
|
|
93
|
+
PROMPTS_LABEL_ENDPOINT = "/v1/prompts/:alias/labels/:label"
|
|
93
94
|
PROMPTS_ENDPOINT = "/v1/prompts"
|
|
94
95
|
PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions"
|
|
95
96
|
SIMULATE_ENDPOINT = "/v1/simulate"
|
|
@@ -97,7 +97,8 @@ class GEval(BaseMetric):
|
|
|
97
97
|
test_case, _additional_context=_additional_context
|
|
98
98
|
)
|
|
99
99
|
self.score = (
|
|
100
|
-
float(g_score)
|
|
100
|
+
(float(g_score) - self.score_range[0])
|
|
101
|
+
/ self.score_range_span
|
|
101
102
|
if not self.strict_mode
|
|
102
103
|
else int(g_score)
|
|
103
104
|
)
|
|
@@ -140,7 +141,7 @@ class GEval(BaseMetric):
|
|
|
140
141
|
test_case, _additional_context=_additional_context
|
|
141
142
|
)
|
|
142
143
|
self.score = (
|
|
143
|
-
float(g_score) / self.score_range_span
|
|
144
|
+
(float(g_score) - self.score_range[0]) / self.score_range_span
|
|
144
145
|
if not self.strict_mode
|
|
145
146
|
else int(g_score)
|
|
146
147
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List,
|
|
1
|
+
from typing import List, Dict
|
|
2
2
|
|
|
3
3
|
from deepeval.metrics.indicator import metric_progress_indicator
|
|
4
4
|
from deepeval.metrics.utils import (
|
|
@@ -152,14 +152,19 @@ class ToolCorrectnessMetric(BaseMetric):
|
|
|
152
152
|
|
|
153
153
|
# Calculate score
|
|
154
154
|
def _calculate_score(self):
|
|
155
|
-
|
|
156
|
-
if len(self.expected_tools) == 0:
|
|
157
|
-
score = 1.0 if len(self.tools_called) == 0 else 0.0
|
|
158
|
-
elif self.should_exact_match:
|
|
155
|
+
if self.should_exact_match:
|
|
159
156
|
score = self._calculate_exact_match_score()
|
|
160
157
|
elif self.should_consider_ordering:
|
|
161
158
|
_, weighted_length = self._compute_weighted_lcs()
|
|
162
|
-
|
|
159
|
+
if (
|
|
160
|
+
len(self.tools_called) == len(self.expected_tools)
|
|
161
|
+
and len(self.expected_tools) == 0
|
|
162
|
+
):
|
|
163
|
+
score = 1.0
|
|
164
|
+
elif len(self.expected_tools) == 0:
|
|
165
|
+
score = 0.0
|
|
166
|
+
else:
|
|
167
|
+
score = weighted_length / len(self.expected_tools)
|
|
163
168
|
else:
|
|
164
169
|
score = self._calculate_non_exact_match_score()
|
|
165
170
|
return 0 if self.strict_mode and score < self.threshold else score
|
|
@@ -294,7 +299,7 @@ class ToolCorrectnessMetric(BaseMetric):
|
|
|
294
299
|
def is_successful(self) -> bool:
|
|
295
300
|
try:
|
|
296
301
|
self.success = self.score >= self.threshold
|
|
297
|
-
except:
|
|
302
|
+
except (AttributeError, TypeError):
|
|
298
303
|
self.success = False
|
|
299
304
|
return self.success
|
|
300
305
|
|
|
@@ -9,7 +9,7 @@ from deepeval.models.retry_policy import (
|
|
|
9
9
|
sdk_retries_for,
|
|
10
10
|
)
|
|
11
11
|
from deepeval.models import DeepEvalBaseLLM
|
|
12
|
-
from deepeval.models.llms.utils import trim_and_load_json
|
|
12
|
+
from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
|
|
13
13
|
from deepeval.constants import ProviderSlug as PS
|
|
14
14
|
|
|
15
15
|
# check aiobotocore availability
|
|
@@ -40,7 +40,6 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
40
40
|
region_name: str,
|
|
41
41
|
aws_access_key_id: Optional[str] = None,
|
|
42
42
|
aws_secret_access_key: Optional[str] = None,
|
|
43
|
-
temperature: float = 0,
|
|
44
43
|
input_token_cost: float = 0,
|
|
45
44
|
output_token_cost: float = 0,
|
|
46
45
|
generation_kwargs: Optional[Dict] = None,
|
|
@@ -53,13 +52,9 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
53
52
|
self.region_name = region_name
|
|
54
53
|
self.aws_access_key_id = aws_access_key_id
|
|
55
54
|
self.aws_secret_access_key = aws_secret_access_key
|
|
56
|
-
self.temperature = temperature
|
|
57
55
|
self.input_token_cost = input_token_cost
|
|
58
56
|
self.output_token_cost = output_token_cost
|
|
59
57
|
|
|
60
|
-
if self.temperature < 0:
|
|
61
|
-
raise ValueError("Temperature must be >= 0.")
|
|
62
|
-
|
|
63
58
|
# prepare aiobotocore session, config, and async exit stack
|
|
64
59
|
self._session = get_session()
|
|
65
60
|
self._exit_stack = AsyncExitStack()
|
|
@@ -75,7 +70,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
75
70
|
def generate(
|
|
76
71
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
77
72
|
) -> Tuple[Union[str, Dict], float]:
|
|
78
|
-
return
|
|
73
|
+
return safe_asyncio_run(self.a_generate(prompt, schema))
|
|
79
74
|
|
|
80
75
|
@retry_bedrock
|
|
81
76
|
async def a_generate(
|
|
@@ -142,34 +137,11 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
142
137
|
###############################################
|
|
143
138
|
|
|
144
139
|
def get_converse_request_body(self, prompt: str) -> dict:
|
|
145
|
-
# Inline parameter translation with defaults
|
|
146
|
-
param_mapping = {
|
|
147
|
-
"max_tokens": "maxTokens",
|
|
148
|
-
"top_p": "topP",
|
|
149
|
-
"top_k": "topK",
|
|
150
|
-
"stop_sequences": "stopSequences",
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
# Start with defaults for required parameters
|
|
154
|
-
translated_kwargs = {
|
|
155
|
-
"maxTokens": self.generation_kwargs.get("max_tokens", 1000),
|
|
156
|
-
"topP": self.generation_kwargs.get("top_p", 0),
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
# Add any other parameters from generation_kwargs
|
|
160
|
-
for key, value in self.generation_kwargs.items():
|
|
161
|
-
if key not in [
|
|
162
|
-
"max_tokens",
|
|
163
|
-
"top_p",
|
|
164
|
-
]: # Skip already handled defaults
|
|
165
|
-
aws_key = param_mapping.get(key, key)
|
|
166
|
-
translated_kwargs[aws_key] = value
|
|
167
140
|
|
|
168
141
|
return {
|
|
169
142
|
"messages": [{"role": "user", "content": [{"text": prompt}]}],
|
|
170
143
|
"inferenceConfig": {
|
|
171
|
-
|
|
172
|
-
**translated_kwargs,
|
|
144
|
+
**self.generation_kwargs,
|
|
173
145
|
},
|
|
174
146
|
}
|
|
175
147
|
|
deepeval/models/llms/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
import re
|
|
3
3
|
import json
|
|
4
|
+
import asyncio
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def trim_and_load_json(
|
|
@@ -20,3 +21,24 @@ def trim_and_load_json(
|
|
|
20
21
|
raise ValueError(error_str)
|
|
21
22
|
except Exception as e:
|
|
22
23
|
raise Exception(f"An unexpected error occurred: {str(e)}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def safe_asyncio_run(coro):
|
|
27
|
+
"""
|
|
28
|
+
Run an async coroutine safely.
|
|
29
|
+
Falls back to run_until_complete if already in a running event loop.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
return asyncio.run(coro)
|
|
33
|
+
except RuntimeError:
|
|
34
|
+
try:
|
|
35
|
+
loop = asyncio.get_event_loop()
|
|
36
|
+
if loop.is_running():
|
|
37
|
+
future = asyncio.ensure_future(coro)
|
|
38
|
+
return loop.run_until_complete(future)
|
|
39
|
+
else:
|
|
40
|
+
return loop.run_until_complete(coro)
|
|
41
|
+
except Exception as inner_e:
|
|
42
|
+
raise
|
|
43
|
+
except Exception as e:
|
|
44
|
+
raise
|
deepeval/prompt/api.py
CHANGED
|
@@ -45,6 +45,8 @@ class PromptVersionsHttpResponse(BaseModel):
|
|
|
45
45
|
|
|
46
46
|
class PromptHttpResponse(BaseModel):
|
|
47
47
|
id: str
|
|
48
|
+
version: str
|
|
49
|
+
label: Optional[str] = None
|
|
48
50
|
text: Optional[str] = None
|
|
49
51
|
messages: Optional[List[PromptMessage]] = None
|
|
50
52
|
interpolation_type: PromptInterpolationType = Field(
|
deepeval/prompt/prompt.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Optional, List, Dict
|
|
2
|
+
from typing import Literal, Optional, List, Dict
|
|
3
3
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
|
4
4
|
from rich.console import Console
|
|
5
5
|
import time
|
|
@@ -7,6 +7,8 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
import asyncio
|
|
10
|
+
import portalocker
|
|
11
|
+
import threading
|
|
10
12
|
|
|
11
13
|
from deepeval.prompt.api import (
|
|
12
14
|
PromptHttpResponse,
|
|
@@ -19,12 +21,38 @@ from deepeval.prompt.api import (
|
|
|
19
21
|
from deepeval.prompt.utils import interpolate_text
|
|
20
22
|
from deepeval.confident.api import Api, Endpoints, HttpMethods
|
|
21
23
|
from deepeval.constants import HIDDEN_DIR
|
|
22
|
-
from deepeval.utils import (
|
|
23
|
-
get_or_create_event_loop,
|
|
24
|
-
get_or_create_general_event_loop,
|
|
25
|
-
)
|
|
26
24
|
|
|
27
25
|
CACHE_FILE_NAME = f"{HIDDEN_DIR}/.deepeval-prompt-cache.json"
|
|
26
|
+
VERSION_CACHE_KEY = "version"
|
|
27
|
+
LABEL_CACHE_KEY = "label"
|
|
28
|
+
|
|
29
|
+
# Global background event loop for polling
|
|
30
|
+
_polling_loop: Optional[asyncio.AbstractEventLoop] = None
|
|
31
|
+
_polling_thread: Optional[threading.Thread] = None
|
|
32
|
+
_polling_loop_lock = threading.Lock()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_or_create_polling_loop() -> asyncio.AbstractEventLoop:
|
|
36
|
+
"""Get or create a background event loop for polling that runs in a daemon thread."""
|
|
37
|
+
global _polling_loop, _polling_thread
|
|
38
|
+
|
|
39
|
+
with _polling_loop_lock:
|
|
40
|
+
if _polling_loop is None or not _polling_loop.is_running():
|
|
41
|
+
|
|
42
|
+
def run_loop():
|
|
43
|
+
global _polling_loop
|
|
44
|
+
_polling_loop = asyncio.new_event_loop()
|
|
45
|
+
asyncio.set_event_loop(_polling_loop)
|
|
46
|
+
_polling_loop.run_forever()
|
|
47
|
+
|
|
48
|
+
_polling_thread = threading.Thread(target=run_loop, daemon=True)
|
|
49
|
+
_polling_thread.start()
|
|
50
|
+
|
|
51
|
+
# Wait for loop to be ready
|
|
52
|
+
while _polling_loop is None:
|
|
53
|
+
time.sleep(0.01)
|
|
54
|
+
|
|
55
|
+
return _polling_loop
|
|
28
56
|
|
|
29
57
|
|
|
30
58
|
class CustomEncoder(json.JSONEncoder):
|
|
@@ -39,6 +67,7 @@ class CustomEncoder(json.JSONEncoder):
|
|
|
39
67
|
class CachedPrompt(BaseModel):
|
|
40
68
|
alias: str
|
|
41
69
|
version: str
|
|
70
|
+
label: Optional[str] = None
|
|
42
71
|
template: Optional[str]
|
|
43
72
|
messages_template: Optional[List[PromptMessage]]
|
|
44
73
|
prompt_version_id: str
|
|
@@ -50,6 +79,7 @@ class CachedPrompt(BaseModel):
|
|
|
50
79
|
|
|
51
80
|
|
|
52
81
|
class Prompt:
|
|
82
|
+
label: Optional[str] = None
|
|
53
83
|
_prompt_version_id: Optional[str] = None
|
|
54
84
|
_type: Optional[PromptType] = None
|
|
55
85
|
_interpolation_type: Optional[PromptInterpolationType] = None
|
|
@@ -73,13 +103,24 @@ class Prompt:
|
|
|
73
103
|
self._text_template = template
|
|
74
104
|
self._messages_template = messages_template
|
|
75
105
|
self._version = None
|
|
76
|
-
self._polling_tasks: Dict[str, asyncio.Task] = {}
|
|
77
|
-
self._refresh_map: Dict[str, int] = {}
|
|
106
|
+
self._polling_tasks: Dict[str, Dict[str, asyncio.Task]] = {}
|
|
107
|
+
self._refresh_map: Dict[str, Dict[str, int]] = {}
|
|
108
|
+
self._lock = (
|
|
109
|
+
threading.Lock()
|
|
110
|
+
) # Protect instance attributes from race conditions
|
|
78
111
|
if template:
|
|
79
112
|
self._type = PromptType.TEXT
|
|
80
113
|
elif messages_template:
|
|
81
114
|
self._type = PromptType.LIST
|
|
82
115
|
|
|
116
|
+
def __del__(self):
|
|
117
|
+
"""Cleanup polling tasks when instance is destroyed"""
|
|
118
|
+
try:
|
|
119
|
+
self._stop_polling()
|
|
120
|
+
except Exception:
|
|
121
|
+
# Suppress exceptions during cleanup to avoid issues in interpreter shutdown
|
|
122
|
+
pass
|
|
123
|
+
|
|
83
124
|
@property
|
|
84
125
|
def version(self):
|
|
85
126
|
if self._version is not None and self._version != "latest":
|
|
@@ -95,33 +136,37 @@ class Prompt:
|
|
|
95
136
|
self._version = value
|
|
96
137
|
|
|
97
138
|
def interpolate(self, **kwargs):
|
|
98
|
-
|
|
99
|
-
|
|
139
|
+
with self._lock:
|
|
140
|
+
prompt_type = self._type
|
|
141
|
+
text_template = self._text_template
|
|
142
|
+
messages_template = self._messages_template
|
|
143
|
+
interpolation_type = self._interpolation_type
|
|
144
|
+
|
|
145
|
+
if prompt_type == PromptType.TEXT:
|
|
146
|
+
if text_template is None:
|
|
100
147
|
raise TypeError(
|
|
101
148
|
"Unable to interpolate empty prompt template. Please pull a prompt from Confident AI or set template manually to continue."
|
|
102
149
|
)
|
|
103
150
|
|
|
104
|
-
return interpolate_text(
|
|
105
|
-
self._interpolation_type, self._text_template, **kwargs
|
|
106
|
-
)
|
|
151
|
+
return interpolate_text(interpolation_type, text_template, **kwargs)
|
|
107
152
|
|
|
108
|
-
elif
|
|
109
|
-
if
|
|
153
|
+
elif prompt_type == PromptType.LIST:
|
|
154
|
+
if messages_template is None:
|
|
110
155
|
raise TypeError(
|
|
111
156
|
"Unable to interpolate empty prompt template messages. Please pull a prompt from Confident AI or set template manually to continue."
|
|
112
157
|
)
|
|
113
158
|
|
|
114
159
|
interpolated_messages = []
|
|
115
|
-
for message in
|
|
160
|
+
for message in messages_template:
|
|
116
161
|
interpolated_content = interpolate_text(
|
|
117
|
-
|
|
162
|
+
interpolation_type, message.content, **kwargs
|
|
118
163
|
)
|
|
119
164
|
interpolated_messages.append(
|
|
120
165
|
{"role": message.role, "content": interpolated_content}
|
|
121
166
|
)
|
|
122
167
|
return interpolated_messages
|
|
123
168
|
else:
|
|
124
|
-
raise ValueError(f"Unsupported prompt type: {
|
|
169
|
+
raise ValueError(f"Unsupported prompt type: {prompt_type}")
|
|
125
170
|
|
|
126
171
|
def _get_versions(self) -> List:
|
|
127
172
|
if self.alias is None:
|
|
@@ -138,111 +183,205 @@ class Prompt:
|
|
|
138
183
|
return versions.text_versions or versions.messages_versions or []
|
|
139
184
|
|
|
140
185
|
def _read_from_cache(
|
|
141
|
-
self,
|
|
186
|
+
self,
|
|
187
|
+
alias: str,
|
|
188
|
+
version: Optional[str] = None,
|
|
189
|
+
label: Optional[str] = None,
|
|
142
190
|
) -> Optional[CachedPrompt]:
|
|
143
191
|
if not os.path.exists(CACHE_FILE_NAME):
|
|
144
|
-
|
|
192
|
+
return None
|
|
145
193
|
|
|
146
194
|
try:
|
|
147
|
-
|
|
195
|
+
# Use shared lock for reading to allow concurrent reads
|
|
196
|
+
with portalocker.Lock(
|
|
197
|
+
CACHE_FILE_NAME,
|
|
198
|
+
mode="r",
|
|
199
|
+
flags=portalocker.LOCK_SH | portalocker.LOCK_NB,
|
|
200
|
+
) as f:
|
|
148
201
|
cache_data = json.load(f)
|
|
149
202
|
|
|
150
203
|
if alias in cache_data:
|
|
151
204
|
if version:
|
|
152
|
-
if
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
205
|
+
if (
|
|
206
|
+
VERSION_CACHE_KEY in cache_data[alias]
|
|
207
|
+
and version in cache_data[alias][VERSION_CACHE_KEY]
|
|
208
|
+
):
|
|
209
|
+
return CachedPrompt(
|
|
210
|
+
**cache_data[alias][VERSION_CACHE_KEY][version]
|
|
157
211
|
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
212
|
+
elif label:
|
|
213
|
+
if (
|
|
214
|
+
LABEL_CACHE_KEY in cache_data[alias]
|
|
215
|
+
and label in cache_data[alias][LABEL_CACHE_KEY]
|
|
216
|
+
):
|
|
217
|
+
return CachedPrompt(
|
|
218
|
+
**cache_data[alias][LABEL_CACHE_KEY][label]
|
|
219
|
+
)
|
|
220
|
+
return None
|
|
221
|
+
except (portalocker.exceptions.LockException, Exception):
|
|
222
|
+
# If cache is locked, corrupted or unreadable, return None and let it fetch from API
|
|
223
|
+
return None
|
|
168
224
|
|
|
169
225
|
def _write_to_cache(
|
|
170
226
|
self,
|
|
171
|
-
|
|
227
|
+
cache_key: Literal[VERSION_CACHE_KEY, LABEL_CACHE_KEY],
|
|
228
|
+
version: str,
|
|
229
|
+
label: Optional[str] = None,
|
|
172
230
|
text_template: Optional[str] = None,
|
|
173
231
|
messages_template: Optional[List[PromptMessage]] = None,
|
|
174
232
|
prompt_version_id: Optional[str] = None,
|
|
175
233
|
type: Optional[PromptType] = None,
|
|
176
234
|
interpolation_type: Optional[PromptInterpolationType] = None,
|
|
177
235
|
):
|
|
178
|
-
if not self.alias
|
|
236
|
+
if not self.alias:
|
|
179
237
|
return
|
|
180
238
|
|
|
181
|
-
cache_data = {}
|
|
182
|
-
if os.path.exists(CACHE_FILE_NAME):
|
|
183
|
-
try:
|
|
184
|
-
with open(CACHE_FILE_NAME, "r") as f:
|
|
185
|
-
cache_data = json.load(f)
|
|
186
|
-
except Exception:
|
|
187
|
-
cache_data = {}
|
|
188
|
-
|
|
189
|
-
# Ensure the cache structure is initialized properly
|
|
190
|
-
if self.alias not in cache_data:
|
|
191
|
-
cache_data[self.alias] = {}
|
|
192
|
-
|
|
193
|
-
# Cache the prompt
|
|
194
|
-
cache_data[self.alias][version] = {
|
|
195
|
-
"alias": self.alias,
|
|
196
|
-
"version": version,
|
|
197
|
-
"template": text_template,
|
|
198
|
-
"messages_template": messages_template,
|
|
199
|
-
"prompt_version_id": prompt_version_id,
|
|
200
|
-
"type": type,
|
|
201
|
-
"interpolation_type": interpolation_type,
|
|
202
|
-
}
|
|
203
|
-
|
|
204
239
|
# Ensure directory exists
|
|
205
240
|
os.makedirs(HIDDEN_DIR, exist_ok=True)
|
|
206
241
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
242
|
+
try:
|
|
243
|
+
# Use r+ mode if file exists, w mode if it doesn't
|
|
244
|
+
mode = "r+" if os.path.exists(CACHE_FILE_NAME) else "w"
|
|
245
|
+
|
|
246
|
+
with portalocker.Lock(
|
|
247
|
+
CACHE_FILE_NAME,
|
|
248
|
+
mode=mode,
|
|
249
|
+
flags=portalocker.LOCK_EX,
|
|
250
|
+
) as f:
|
|
251
|
+
# Read existing cache data if file exists and has content
|
|
252
|
+
cache_data = {}
|
|
253
|
+
if mode == "r+":
|
|
254
|
+
try:
|
|
255
|
+
f.seek(0)
|
|
256
|
+
content = f.read()
|
|
257
|
+
if content:
|
|
258
|
+
cache_data = json.loads(content)
|
|
259
|
+
except (json.JSONDecodeError, Exception):
|
|
260
|
+
cache_data = {}
|
|
261
|
+
|
|
262
|
+
# Ensure the cache structure is initialized properly
|
|
263
|
+
if self.alias not in cache_data:
|
|
264
|
+
cache_data[self.alias] = {}
|
|
265
|
+
|
|
266
|
+
if cache_key not in cache_data[self.alias]:
|
|
267
|
+
cache_data[self.alias][cache_key] = {}
|
|
268
|
+
|
|
269
|
+
# Cache the prompt
|
|
270
|
+
cached_entry = {
|
|
271
|
+
"alias": self.alias,
|
|
272
|
+
"version": version,
|
|
273
|
+
"label": label,
|
|
274
|
+
"template": text_template,
|
|
275
|
+
"messages_template": messages_template,
|
|
276
|
+
"prompt_version_id": prompt_version_id,
|
|
277
|
+
"type": type,
|
|
278
|
+
"interpolation_type": interpolation_type,
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if cache_key == VERSION_CACHE_KEY:
|
|
282
|
+
cache_data[self.alias][cache_key][version] = cached_entry
|
|
283
|
+
else:
|
|
284
|
+
cache_data[self.alias][cache_key][label] = cached_entry
|
|
285
|
+
|
|
286
|
+
# Write back to cache file
|
|
287
|
+
f.seek(0)
|
|
288
|
+
f.truncate()
|
|
289
|
+
json.dump(cache_data, f, cls=CustomEncoder)
|
|
290
|
+
except portalocker.exceptions.LockException:
|
|
291
|
+
# If we can't acquire the lock, silently skip caching
|
|
292
|
+
pass
|
|
293
|
+
except Exception:
|
|
294
|
+
# If any other error occurs during caching, silently skip
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
def _load_from_cache_with_progress(
|
|
298
|
+
self,
|
|
299
|
+
progress: Progress,
|
|
300
|
+
task_id: int,
|
|
301
|
+
start_time: float,
|
|
302
|
+
version: Optional[str] = None,
|
|
303
|
+
label: Optional[str] = None,
|
|
304
|
+
):
|
|
305
|
+
"""
|
|
306
|
+
Load prompt from cache and update progress bar.
|
|
307
|
+
Raises if unable to load from cache.
|
|
308
|
+
"""
|
|
309
|
+
cached_prompt = self._read_from_cache(
|
|
310
|
+
self.alias, version=version, label=label
|
|
311
|
+
)
|
|
312
|
+
if not cached_prompt:
|
|
313
|
+
raise ValueError("Unable to fetch prompt and load from cache")
|
|
314
|
+
|
|
315
|
+
with self._lock:
|
|
316
|
+
self.version = cached_prompt.version
|
|
317
|
+
self.label = cached_prompt.label
|
|
318
|
+
self._text_template = cached_prompt.template
|
|
319
|
+
self._messages_template = cached_prompt.messages_template
|
|
320
|
+
self._prompt_version_id = cached_prompt.prompt_version_id
|
|
321
|
+
self._type = PromptType(cached_prompt.type)
|
|
322
|
+
self._interpolation_type = PromptInterpolationType(
|
|
323
|
+
cached_prompt.interpolation_type
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
end_time = time.perf_counter()
|
|
327
|
+
time_taken = format(end_time - start_time, ".2f")
|
|
328
|
+
progress.update(
|
|
329
|
+
task_id,
|
|
330
|
+
description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
|
|
331
|
+
)
|
|
210
332
|
|
|
211
333
|
def pull(
|
|
212
334
|
self,
|
|
213
335
|
version: Optional[str] = None,
|
|
336
|
+
label: Optional[str] = None,
|
|
214
337
|
fallback_to_cache: bool = True,
|
|
215
338
|
write_to_cache: bool = True,
|
|
216
339
|
default_to_cache: bool = True,
|
|
217
340
|
refresh: Optional[int] = 60,
|
|
218
341
|
):
|
|
342
|
+
should_write_on_first_fetch = False
|
|
219
343
|
if refresh:
|
|
220
|
-
|
|
221
|
-
|
|
344
|
+
# Check if we need to bootstrap the cache
|
|
345
|
+
cached_prompt = self._read_from_cache(
|
|
346
|
+
self.alias, version=version, label=label
|
|
347
|
+
)
|
|
348
|
+
if cached_prompt is None:
|
|
349
|
+
# No cache exists, so we should write after fetching to bootstrap
|
|
350
|
+
should_write_on_first_fetch = True
|
|
351
|
+
write_to_cache = False # Polling will handle subsequent writes
|
|
352
|
+
|
|
222
353
|
if self.alias is None:
|
|
223
354
|
raise TypeError(
|
|
224
355
|
"Unable to pull prompt from Confident AI when no alias is provided."
|
|
225
356
|
)
|
|
226
357
|
|
|
227
358
|
# Manage background prompt polling
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
359
|
+
if refresh:
|
|
360
|
+
loop = _get_or_create_polling_loop()
|
|
361
|
+
asyncio.run_coroutine_threadsafe(
|
|
362
|
+
self.create_polling_task(version, label, refresh), loop
|
|
363
|
+
)
|
|
233
364
|
|
|
234
365
|
if default_to_cache:
|
|
235
366
|
try:
|
|
236
|
-
cached_prompt = self._read_from_cache(
|
|
367
|
+
cached_prompt = self._read_from_cache(
|
|
368
|
+
self.alias, version=version, label=label
|
|
369
|
+
)
|
|
237
370
|
if cached_prompt:
|
|
238
|
-
self.
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
371
|
+
with self._lock:
|
|
372
|
+
self.version = cached_prompt.version
|
|
373
|
+
self.label = cached_prompt.label
|
|
374
|
+
self._text_template = cached_prompt.template
|
|
375
|
+
self._messages_template = (
|
|
376
|
+
cached_prompt.messages_template
|
|
377
|
+
)
|
|
378
|
+
self._prompt_version_id = (
|
|
379
|
+
cached_prompt.prompt_version_id
|
|
380
|
+
)
|
|
381
|
+
self._type = PromptType(cached_prompt.type)
|
|
382
|
+
self._interpolation_type = PromptInterpolationType(
|
|
383
|
+
cached_prompt.interpolation_type
|
|
384
|
+
)
|
|
246
385
|
return
|
|
247
386
|
except:
|
|
248
387
|
pass
|
|
@@ -254,63 +393,66 @@ class Prompt:
|
|
|
254
393
|
TextColumn("[progress.description]{task.description}"),
|
|
255
394
|
transient=False,
|
|
256
395
|
) as progress:
|
|
396
|
+
HINT_TEXT = (
|
|
397
|
+
f"version='{version or 'latest'}'"
|
|
398
|
+
if not label
|
|
399
|
+
else f"label='{label}'"
|
|
400
|
+
)
|
|
257
401
|
task_id = progress.add_task(
|
|
258
|
-
f"Pulling [rgb(106,0,255)]'{self.alias}' (
|
|
402
|
+
f"Pulling [rgb(106,0,255)]'{self.alias}' ({HINT_TEXT})[/rgb(106,0,255)] from Confident AI...",
|
|
259
403
|
total=100,
|
|
260
404
|
)
|
|
405
|
+
|
|
261
406
|
start_time = time.perf_counter()
|
|
262
407
|
try:
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
408
|
+
if label:
|
|
409
|
+
data, _ = api.send_request(
|
|
410
|
+
method=HttpMethods.GET,
|
|
411
|
+
endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
|
|
412
|
+
url_params={
|
|
413
|
+
"alias": self.alias,
|
|
414
|
+
"label": label,
|
|
415
|
+
},
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
data, _ = api.send_request(
|
|
419
|
+
method=HttpMethods.GET,
|
|
420
|
+
endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
|
|
421
|
+
url_params={
|
|
422
|
+
"alias": self.alias,
|
|
423
|
+
"versionId": version or "latest",
|
|
424
|
+
},
|
|
425
|
+
)
|
|
426
|
+
|
|
271
427
|
response = PromptHttpResponse(
|
|
272
428
|
id=data["id"],
|
|
429
|
+
version=data.get("version", None),
|
|
430
|
+
label=data.get("label", None),
|
|
273
431
|
text=data.get("text", None),
|
|
274
432
|
messages=data.get("messages", None),
|
|
275
433
|
type=data["type"],
|
|
276
434
|
interpolation_type=data["interpolationType"],
|
|
277
435
|
)
|
|
278
|
-
except:
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
end_time = time.perf_counter()
|
|
299
|
-
time_taken = format(end_time - start_time, ".2f")
|
|
300
|
-
progress.update(
|
|
301
|
-
task_id,
|
|
302
|
-
description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Loaded from cache! ({time_taken}s)",
|
|
303
|
-
)
|
|
304
|
-
return
|
|
305
|
-
except:
|
|
306
|
-
raise
|
|
307
|
-
|
|
308
|
-
self.version = version or "latest"
|
|
309
|
-
self._text_template = response.text
|
|
310
|
-
self._messages_template = response.messages
|
|
311
|
-
self._prompt_version_id = response.id
|
|
312
|
-
self._type = response.type
|
|
313
|
-
self._interpolation_type = response.interpolation_type
|
|
436
|
+
except Exception:
|
|
437
|
+
if fallback_to_cache:
|
|
438
|
+
self._load_from_cache_with_progress(
|
|
439
|
+
progress,
|
|
440
|
+
task_id,
|
|
441
|
+
start_time,
|
|
442
|
+
version=version,
|
|
443
|
+
label=label,
|
|
444
|
+
)
|
|
445
|
+
return
|
|
446
|
+
raise
|
|
447
|
+
|
|
448
|
+
with self._lock:
|
|
449
|
+
self.version = response.version
|
|
450
|
+
self.label = response.label
|
|
451
|
+
self._text_template = response.text
|
|
452
|
+
self._messages_template = response.messages
|
|
453
|
+
self._prompt_version_id = response.id
|
|
454
|
+
self._type = response.type
|
|
455
|
+
self._interpolation_type = response.interpolation_type
|
|
314
456
|
|
|
315
457
|
end_time = time.perf_counter()
|
|
316
458
|
time_taken = format(end_time - start_time, ".2f")
|
|
@@ -318,9 +460,12 @@ class Prompt:
|
|
|
318
460
|
task_id,
|
|
319
461
|
description=f"{progress.tasks[task_id].description}[rgb(25,227,160)]Done! ({time_taken}s)",
|
|
320
462
|
)
|
|
321
|
-
if
|
|
463
|
+
# Write to cache if explicitly requested OR if we need to bootstrap cache for refresh mode
|
|
464
|
+
if write_to_cache or should_write_on_first_fetch:
|
|
322
465
|
self._write_to_cache(
|
|
323
|
-
|
|
466
|
+
cache_key=LABEL_CACHE_KEY if label else VERSION_CACHE_KEY,
|
|
467
|
+
version=response.version,
|
|
468
|
+
label=response.label,
|
|
324
469
|
text_template=response.text,
|
|
325
470
|
messages_template=response.messages,
|
|
326
471
|
prompt_version_id=response.id,
|
|
@@ -380,55 +525,117 @@ class Prompt:
|
|
|
380
525
|
async def create_polling_task(
|
|
381
526
|
self,
|
|
382
527
|
version: Optional[str],
|
|
528
|
+
label: Optional[str],
|
|
383
529
|
refresh: Optional[int] = 60,
|
|
384
530
|
):
|
|
385
|
-
if version is None:
|
|
386
|
-
return
|
|
387
|
-
|
|
388
531
|
# If polling task doesn't exist, start it
|
|
389
|
-
|
|
532
|
+
CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
|
|
533
|
+
cache_value = label if label else version
|
|
534
|
+
|
|
535
|
+
# Initialize nested dicts if they don't exist
|
|
536
|
+
if CACHE_KEY not in self._polling_tasks:
|
|
537
|
+
self._polling_tasks[CACHE_KEY] = {}
|
|
538
|
+
if CACHE_KEY not in self._refresh_map:
|
|
539
|
+
self._refresh_map[CACHE_KEY] = {}
|
|
540
|
+
|
|
541
|
+
polling_task: Optional[asyncio.Task] = self._polling_tasks[
|
|
542
|
+
CACHE_KEY
|
|
543
|
+
].get(cache_value)
|
|
544
|
+
|
|
390
545
|
if refresh:
|
|
391
|
-
self._refresh_map[
|
|
546
|
+
self._refresh_map[CACHE_KEY][cache_value] = refresh
|
|
392
547
|
if not polling_task:
|
|
393
|
-
self._polling_tasks[
|
|
394
|
-
self.poll(version)
|
|
548
|
+
self._polling_tasks[CACHE_KEY][cache_value] = (
|
|
549
|
+
asyncio.create_task(self.poll(version, label))
|
|
395
550
|
)
|
|
396
551
|
|
|
397
552
|
# If invalid `refresh`, stop the task
|
|
398
553
|
else:
|
|
399
554
|
if polling_task:
|
|
400
555
|
polling_task.cancel()
|
|
401
|
-
self._polling_tasks
|
|
402
|
-
|
|
556
|
+
if cache_value in self._polling_tasks[CACHE_KEY]:
|
|
557
|
+
self._polling_tasks[CACHE_KEY].pop(cache_value)
|
|
558
|
+
if cache_value in self._refresh_map[CACHE_KEY]:
|
|
559
|
+
self._refresh_map[CACHE_KEY].pop(cache_value)
|
|
560
|
+
|
|
561
|
+
async def poll(
|
|
562
|
+
self,
|
|
563
|
+
version: Optional[str] = None,
|
|
564
|
+
label: Optional[str] = None,
|
|
565
|
+
):
|
|
566
|
+
CACHE_KEY = LABEL_CACHE_KEY if label else VERSION_CACHE_KEY
|
|
567
|
+
cache_value = label if label else version
|
|
403
568
|
|
|
404
|
-
async def poll(self, version: Optional[str] = None):
|
|
405
|
-
api = Api()
|
|
406
569
|
while True:
|
|
570
|
+
await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
|
|
571
|
+
|
|
572
|
+
api = Api()
|
|
407
573
|
try:
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
574
|
+
if label:
|
|
575
|
+
data, _ = api.send_request(
|
|
576
|
+
method=HttpMethods.GET,
|
|
577
|
+
endpoint=Endpoints.PROMPTS_LABEL_ENDPOINT,
|
|
578
|
+
url_params={
|
|
579
|
+
"alias": self.alias,
|
|
580
|
+
"label": label,
|
|
581
|
+
},
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
data, _ = api.send_request(
|
|
585
|
+
method=HttpMethods.GET,
|
|
586
|
+
endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
|
|
587
|
+
url_params={
|
|
588
|
+
"alias": self.alias,
|
|
589
|
+
"versionId": version or "latest",
|
|
590
|
+
},
|
|
591
|
+
)
|
|
592
|
+
|
|
416
593
|
response = PromptHttpResponse(
|
|
417
594
|
id=data["id"],
|
|
595
|
+
version=data.get("version", None),
|
|
596
|
+
label=data.get("label", None),
|
|
418
597
|
text=data.get("text", None),
|
|
419
598
|
messages=data.get("messages", None),
|
|
420
599
|
type=data["type"],
|
|
421
600
|
interpolation_type=data["interpolationType"],
|
|
422
601
|
)
|
|
602
|
+
|
|
603
|
+
# Update the cache with fresh data from server
|
|
423
604
|
self._write_to_cache(
|
|
424
|
-
|
|
605
|
+
cache_key=CACHE_KEY,
|
|
606
|
+
version=response.version,
|
|
607
|
+
label=response.label,
|
|
425
608
|
text_template=response.text,
|
|
426
609
|
messages_template=response.messages,
|
|
427
610
|
prompt_version_id=response.id,
|
|
428
611
|
type=response.type,
|
|
429
612
|
interpolation_type=response.interpolation_type,
|
|
430
613
|
)
|
|
431
|
-
|
|
614
|
+
|
|
615
|
+
# Update in-memory properties with fresh data (thread-safe)
|
|
616
|
+
with self._lock:
|
|
617
|
+
self.version = response.version
|
|
618
|
+
self.label = response.label
|
|
619
|
+
self._text_template = response.text
|
|
620
|
+
self._messages_template = response.messages
|
|
621
|
+
self._prompt_version_id = response.id
|
|
622
|
+
self._type = response.type
|
|
623
|
+
self._interpolation_type = response.interpolation_type
|
|
624
|
+
|
|
625
|
+
except Exception:
|
|
432
626
|
pass
|
|
433
627
|
|
|
434
|
-
|
|
628
|
+
def _stop_polling(self):
|
|
629
|
+
loop = _polling_loop
|
|
630
|
+
if not loop or not loop.is_running():
|
|
631
|
+
return
|
|
632
|
+
|
|
633
|
+
# Stop all polling tasks
|
|
634
|
+
for ck in list(self._polling_tasks.keys()):
|
|
635
|
+
for cv in list(self._polling_tasks[ck].keys()):
|
|
636
|
+
task = self._polling_tasks[ck][cv]
|
|
637
|
+
if task and not task.done():
|
|
638
|
+
loop.call_soon_threadsafe(task.cancel)
|
|
639
|
+
self._polling_tasks[ck].clear()
|
|
640
|
+
self._refresh_map[ck].clear()
|
|
641
|
+
return
|
deepeval/tracing/otel/utils.py
CHANGED
|
@@ -383,53 +383,70 @@ def post_test_run(traces: List[Trace], test_run_id: Optional[str]):
|
|
|
383
383
|
# return test_run_manager.post_test_run(test_run) TODO: add after test run with metric collection is implemented
|
|
384
384
|
|
|
385
385
|
|
|
386
|
+
def _normalize_pydantic_ai_messages(span: ReadableSpan) -> Optional[list]:
|
|
387
|
+
try:
|
|
388
|
+
raw = span.attributes.get("pydantic_ai.all_messages")
|
|
389
|
+
if not raw:
|
|
390
|
+
return None
|
|
391
|
+
|
|
392
|
+
messages = raw
|
|
393
|
+
if isinstance(messages, str):
|
|
394
|
+
messages = json.loads(messages)
|
|
395
|
+
elif isinstance(messages, tuple):
|
|
396
|
+
messages = list(messages)
|
|
397
|
+
|
|
398
|
+
if isinstance(messages, list):
|
|
399
|
+
normalized = []
|
|
400
|
+
for m in messages:
|
|
401
|
+
if isinstance(m, str):
|
|
402
|
+
try:
|
|
403
|
+
m = json.loads(m)
|
|
404
|
+
except Exception:
|
|
405
|
+
pass
|
|
406
|
+
normalized.append(m)
|
|
407
|
+
return normalized
|
|
408
|
+
except Exception:
|
|
409
|
+
pass
|
|
410
|
+
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
|
|
386
414
|
def check_pydantic_ai_agent_input_output(
|
|
387
415
|
span: ReadableSpan,
|
|
388
416
|
) -> Tuple[Optional[Any], Optional[Any]]:
|
|
389
417
|
input_val: Optional[Any] = None
|
|
390
418
|
output_val: Optional[Any] = None
|
|
391
419
|
|
|
420
|
+
# Get normalized messages once
|
|
421
|
+
normalized = _normalize_pydantic_ai_messages(span)
|
|
422
|
+
|
|
392
423
|
# Input (pydantic_ai.all_messages) - slice up to and including the first 'user' message
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
first_user_idx = None
|
|
413
|
-
for i, m in enumerate(normalized):
|
|
414
|
-
role = None
|
|
415
|
-
if isinstance(m, dict):
|
|
416
|
-
role = m.get("role") or m.get("author")
|
|
417
|
-
if role == "user":
|
|
418
|
-
first_user_idx = i
|
|
419
|
-
break
|
|
420
|
-
|
|
421
|
-
input_val = (
|
|
422
|
-
normalized
|
|
423
|
-
if first_user_idx is None
|
|
424
|
-
else normalized[: first_user_idx + 1]
|
|
425
|
-
)
|
|
426
|
-
except Exception:
|
|
427
|
-
pass
|
|
424
|
+
if normalized:
|
|
425
|
+
try:
|
|
426
|
+
first_user_idx = None
|
|
427
|
+
for i, m in enumerate(normalized):
|
|
428
|
+
role = None
|
|
429
|
+
if isinstance(m, dict):
|
|
430
|
+
role = m.get("role") or m.get("author")
|
|
431
|
+
if role == "user":
|
|
432
|
+
first_user_idx = i
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
input_val = (
|
|
436
|
+
normalized
|
|
437
|
+
if first_user_idx is None
|
|
438
|
+
else normalized[: first_user_idx + 1]
|
|
439
|
+
)
|
|
440
|
+
except Exception:
|
|
441
|
+
pass
|
|
428
442
|
|
|
429
443
|
# Output (agent final_result)
|
|
430
444
|
try:
|
|
431
445
|
if span.attributes.get("confident.span.type") == "agent":
|
|
432
446
|
output_val = span.attributes.get("final_result")
|
|
447
|
+
if not output_val and normalized:
|
|
448
|
+
# Extract the last message if no final_result is available
|
|
449
|
+
output_val = normalized[-1]
|
|
433
450
|
except Exception:
|
|
434
451
|
pass
|
|
435
452
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
deepeval/__init__.py,sha256=6fsb813LD_jNhqR-xZnSdE5E-KsBbC3tc4oIg5ZMgTw,2115
|
|
2
|
-
deepeval/_version.py,sha256=
|
|
2
|
+
deepeval/_version.py,sha256=7aJWTxY4XnqpfnHnpzOHDXIjM0FFMGZTYkyt2xqUalQ,27
|
|
3
3
|
deepeval/annotation/__init__.py,sha256=ZFhUVNNuH_YgQSZJ-m5E9iUb9TkAkEV33a6ouMDZ8EI,111
|
|
4
4
|
deepeval/annotation/annotation.py,sha256=3j3-syeJepAcEj3u3e4T_BeRDzNr7yXGDIoNQGMKpwQ,2298
|
|
5
5
|
deepeval/annotation/api.py,sha256=EYN33ACVzVxsFleRYm60KB4Exvff3rPJKt1VBuuX970,2147
|
|
@@ -138,7 +138,7 @@ deepeval/cli/test.py,sha256=kSIFMRTAfVzBJ4OitwvT829-ylV7UzPMP57P2DePS-Q,5482
|
|
|
138
138
|
deepeval/cli/types.py,sha256=_7KdthstHNc-JKCWrfpDQCf_j8h9PMxh0qJCHmVXJr0,310
|
|
139
139
|
deepeval/cli/utils.py,sha256=F4-yuONzk4ojDoSLjI9RYERB7HOD412iZ2lNlSCq4wk,5601
|
|
140
140
|
deepeval/confident/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
141
|
-
deepeval/confident/api.py,sha256=
|
|
141
|
+
deepeval/confident/api.py,sha256=2ZhrQOtfxcnQSyY6OxrjY17y1yn-NB7pfIiJa20B1Pk,8519
|
|
142
142
|
deepeval/confident/types.py,sha256=-slFhDof_1maMgpLxqDRZv6kz6ZVY2hP_0uj_aveJKU,533
|
|
143
143
|
deepeval/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
144
|
deepeval/config/settings.py,sha256=gRRi6nXEUKse13xAShU9MA18zo14vpIgl_R0xJ_0vnM,21314
|
|
@@ -240,7 +240,7 @@ deepeval/metrics/faithfulness/faithfulness.py,sha256=bYVhHI7Tr7xH0x-7F2LijxRuCCE
|
|
|
240
240
|
deepeval/metrics/faithfulness/schema.py,sha256=2dU9dwwmqpGJcWvY2webERWIfH_tn02xgLghHkAY_eM,437
|
|
241
241
|
deepeval/metrics/faithfulness/template.py,sha256=RuZ0LFm4BjZ8lhVrKPgU3ecHszwkF0fe5-BxAkaP5AA,5839
|
|
242
242
|
deepeval/metrics/g_eval/__init__.py,sha256=HAhsQFVq9LIpZXPN00Jc_WrMXrh47NIT86VnUpWM4_4,102
|
|
243
|
-
deepeval/metrics/g_eval/g_eval.py,sha256=
|
|
243
|
+
deepeval/metrics/g_eval/g_eval.py,sha256=CaW7VHPW-SyXt18IE1rSatgagY238s3It-j6SLRI4H4,14395
|
|
244
244
|
deepeval/metrics/g_eval/schema.py,sha256=V629txuDrr_2IEKEsgJVYYZb_pkdfcltQV9ZjvxK5co,287
|
|
245
245
|
deepeval/metrics/g_eval/template.py,sha256=mHj4-mr_HQwbCjpHg7lM_6UesoSatL3g8UGGQAOdT0U,4509
|
|
246
246
|
deepeval/metrics/g_eval/utils.py,sha256=uUT86jRXVYvLDzcnZvvfWssDyGoBHb66nWcJSg4i1u4,8784
|
|
@@ -348,7 +348,7 @@ deepeval/metrics/task_completion/schema.py,sha256=JfnZkbCh7skWvrESy65GEo6Rvo0FDJ
|
|
|
348
348
|
deepeval/metrics/task_completion/task_completion.py,sha256=RKFkXCVOhO70I8A16zv5BCaV3QVKldNxawJ0T93U_Zc,8978
|
|
349
349
|
deepeval/metrics/task_completion/template.py,sha256=4xjTBcGrPQxInbf8iwJOZyok9SQex1aCkbxKmfkXoA4,10437
|
|
350
350
|
deepeval/metrics/tool_correctness/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
|
-
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=
|
|
351
|
+
deepeval/metrics/tool_correctness/tool_correctness.py,sha256=j5wB9mJp7BLbn3bTZd6LlIeub1kXxXGaDVWrzyvBFo4,12111
|
|
352
352
|
deepeval/metrics/toxicity/__init__.py,sha256=1lgt8BKxfBDd7bfSLu_5kMzmsr9b2_ahPK9oq5zLkMk,39
|
|
353
353
|
deepeval/metrics/toxicity/schema.py,sha256=7uUdzXqTvIIz5nfahlllo_fzVRXg7UeMeXn7Hl32pKY,459
|
|
354
354
|
deepeval/metrics/toxicity/template.py,sha256=zl4y4Tg9gXkxKJ8aXVwj0cJ94pvfYuP7MTeV3dvB5yQ,5045
|
|
@@ -370,7 +370,7 @@ deepeval/models/embedding_models/ollama_embedding_model.py,sha256=w3etdIdWvYfVIE
|
|
|
370
370
|
deepeval/models/embedding_models/openai_embedding_model.py,sha256=Z1--e3CnNNmwryqmUMxBCaTURjtgKWHqADuUeCqFlSc,3545
|
|
371
371
|
deepeval/models/hallucination_model.py,sha256=ABi978VKLE_jNHbDzM96kJ08EsZ5ZlvOlJHA_ptSkfQ,1003
|
|
372
372
|
deepeval/models/llms/__init__.py,sha256=qmvv7wnmTDvys2uUTwQRo-_3DlFV3fGLiewPeQYRsAI,670
|
|
373
|
-
deepeval/models/llms/amazon_bedrock_model.py,sha256=
|
|
373
|
+
deepeval/models/llms/amazon_bedrock_model.py,sha256=3yiUUGU_d_YK7Usq8v5iqG3yHa5VnqeDOoCLG_p8rtc,5185
|
|
374
374
|
deepeval/models/llms/anthropic_model.py,sha256=5gYRNkYUD7Zl3U0SibBG2YGCQsD6DdTsaBhqdaJlKIw,6072
|
|
375
375
|
deepeval/models/llms/azure_model.py,sha256=dqINcfoJNqdd9zh5iTPwQ_ToGMOF7iH6YUB-UWRSOlc,10730
|
|
376
376
|
deepeval/models/llms/deepseek_model.py,sha256=EqBJkKa7rXppCmlnIt_D-Z_r9fbsOUsOAVvN2jWA-Hk,6404
|
|
@@ -380,8 +380,8 @@ deepeval/models/llms/kimi_model.py,sha256=ldTefdSVitZYJJQ-_ZsP87iiT5iZ4QCVdfi-Yz
|
|
|
380
380
|
deepeval/models/llms/litellm_model.py,sha256=iu4-_JCpd9LdEa-eCWseD2iLTA-r7OSgYGWQ0IxB4eA,11527
|
|
381
381
|
deepeval/models/llms/local_model.py,sha256=hEyKVA6pkQm9dICUKsMNgjVI3w6gnyMdmBt_EylkWDk,4473
|
|
382
382
|
deepeval/models/llms/ollama_model.py,sha256=xPO4d4jMY-cQAyHAcMuFvWS8JMWwCUbKP9CMi838Nuc,3307
|
|
383
|
-
deepeval/models/llms/openai_model.py,sha256=
|
|
384
|
-
deepeval/models/llms/utils.py,sha256=
|
|
383
|
+
deepeval/models/llms/openai_model.py,sha256=mUvQ8a9FVk4lrdZyS_QRZTK4imufyaCNjZFPeqbc0AM,17167
|
|
384
|
+
deepeval/models/llms/utils.py,sha256=gFM_8eIvdSwN_D4Yqp-j7PkfoiRn_bgu7tlCHol3A6c,1324
|
|
385
385
|
deepeval/models/mlllms/__init__.py,sha256=19nN6kUB5XI0nUWUQX0aD9GBUMM8WWGvsDgKjuT4EF4,144
|
|
386
386
|
deepeval/models/mlllms/gemini_model.py,sha256=7tHIWD4w_fBz3L7jkKWygn1QpBPk9nl2Kw-yb0Jc3PI,10167
|
|
387
387
|
deepeval/models/mlllms/ollama_model.py,sha256=_YtYtw8oIMVVI-CFsDicsdeEJUPhw_9ArPxB_1olsJA,4798
|
|
@@ -404,8 +404,8 @@ deepeval/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
404
404
|
deepeval/plugins/plugin.py,sha256=_dwsdx4Dg9DbXxK3f7zJY4QWTJQWc7QE1HmIg2Zjjag,1515
|
|
405
405
|
deepeval/progress_context.py,sha256=ZSKpxrE9sdgt9G3REKnVeXAv7GJXHHVGgLynpG1Pudw,3557
|
|
406
406
|
deepeval/prompt/__init__.py,sha256=M99QTWdxOfiNeySGCSqN873Q80PPxqRvjLq4_Mw-X1w,49
|
|
407
|
-
deepeval/prompt/api.py,sha256=
|
|
408
|
-
deepeval/prompt/prompt.py,sha256=
|
|
407
|
+
deepeval/prompt/api.py,sha256=665mLKiq8irXWV8kM9P_qFJipdCYZUNQFwW8AkA3itM,1777
|
|
408
|
+
deepeval/prompt/prompt.py,sha256=JjPm7rB-3rnTs8oEQT4EEwnqQqE8ZFNXebVngEOWsI4,23537
|
|
409
409
|
deepeval/prompt/utils.py,sha256=Ermw9P-1-T5wQ5uYuj5yWgdj7pVB_JLw8D37Qvmh9ok,1938
|
|
410
410
|
deepeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
411
411
|
deepeval/red_teaming/README.md,sha256=BY5rAdpp3-sMMToEKwq0Nsd9ivkGDzPE16DeDb8GY7U,154
|
|
@@ -454,15 +454,15 @@ deepeval/tracing/offline_evals/thread.py,sha256=bcSGFcZJKnszArOLIlWvnCyt0zSmsd7X
|
|
|
454
454
|
deepeval/tracing/offline_evals/trace.py,sha256=vTflaTKysKRiYvKA-Nx6PUJ3J6NrRLXiIdWieVcm90E,1868
|
|
455
455
|
deepeval/tracing/otel/__init__.py,sha256=HQsaF5yLPwyW5qg8AOV81_nG_7pFHnatOTHi9Wx3HEk,88
|
|
456
456
|
deepeval/tracing/otel/exporter.py,sha256=wPO1ITKpjueLOSNLO6nD2QL9LAd8Xcu6en8hRkB61Wo,28891
|
|
457
|
-
deepeval/tracing/otel/utils.py,sha256=
|
|
457
|
+
deepeval/tracing/otel/utils.py,sha256=yAXyPvTjax2HdLcvbVv9pyOVW4S7elIp3RLGuBTr_8o,15113
|
|
458
458
|
deepeval/tracing/patchers.py,sha256=DAPNkhrDtoeyJIVeQDUMhTz-xGcXu00eqjQZmov8FiU,3096
|
|
459
459
|
deepeval/tracing/perf_epoch_bridge.py,sha256=iyAPddB6Op7NpMtPHJ29lDm53Btz9yLaN6xSCfTRQm4,1825
|
|
460
460
|
deepeval/tracing/tracing.py,sha256=xZEyuxdGY259nQaDkGp_qO7Avriv8hrf4L15ZfeMNV8,42728
|
|
461
461
|
deepeval/tracing/types.py,sha256=l_utWKerNlE5H3mOKpeUJLsvpP3cMyjH7HRANNgTmSQ,5306
|
|
462
462
|
deepeval/tracing/utils.py,sha256=SLnks8apGlrV6uVnvFVl2mWYABEkvXbPXnQvq3KaU_o,7943
|
|
463
463
|
deepeval/utils.py,sha256=-_o3W892u7naX4Y7a8if4mP0Rtkgtapg6Krr1ZBpj0o,17197
|
|
464
|
-
deepeval-3.6.
|
|
465
|
-
deepeval-3.6.
|
|
466
|
-
deepeval-3.6.
|
|
467
|
-
deepeval-3.6.
|
|
468
|
-
deepeval-3.6.
|
|
464
|
+
deepeval-3.6.4.dist-info/LICENSE.md,sha256=0ATkuLv6QgsJTBODUHC5Rak_PArA6gv2t7inJzNTP38,11352
|
|
465
|
+
deepeval-3.6.4.dist-info/METADATA,sha256=oZQnVgn7bI4TUmgA7W_fsoflHL4RuT23O7oBkoo5XcM,18754
|
|
466
|
+
deepeval-3.6.4.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
467
|
+
deepeval-3.6.4.dist-info/entry_points.txt,sha256=fVr8UphXTfJe9I2rObmUtfU3gkSrYeM0pLy-NbJYg10,94
|
|
468
|
+
deepeval-3.6.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|