deepeval 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +104 -36
- deepeval/config/utils.py +5 -0
- deepeval/dataset/dataset.py +162 -30
- deepeval/dataset/utils.py +41 -13
- deepeval/errors.py +20 -2
- deepeval/evaluate/execute.py +1662 -688
- deepeval/evaluate/types.py +1 -0
- deepeval/evaluate/utils.py +13 -3
- deepeval/integrations/crewai/__init__.py +2 -1
- deepeval/integrations/crewai/tool.py +71 -0
- deepeval/integrations/llama_index/__init__.py +0 -4
- deepeval/integrations/llama_index/handler.py +20 -21
- deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
- deepeval/metrics/__init__.py +13 -0
- deepeval/metrics/base_metric.py +1 -0
- deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
- deepeval/metrics/conversational_g_eval/__init__.py +3 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
- deepeval/metrics/dag/schema.py +1 -1
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/goal_accuracy/__init__.py +1 -0
- deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
- deepeval/metrics/goal_accuracy/schema.py +17 -0
- deepeval/metrics/goal_accuracy/template.py +235 -0
- deepeval/metrics/hallucination/hallucination.py +8 -8
- deepeval/metrics/indicator.py +21 -1
- deepeval/metrics/mcp/mcp_task_completion.py +7 -2
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
- deepeval/metrics/plan_adherence/__init__.py +1 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
- deepeval/metrics/plan_adherence/schema.py +11 -0
- deepeval/metrics/plan_adherence/template.py +170 -0
- deepeval/metrics/plan_quality/__init__.py +1 -0
- deepeval/metrics/plan_quality/plan_quality.py +292 -0
- deepeval/metrics/plan_quality/schema.py +11 -0
- deepeval/metrics/plan_quality/template.py +101 -0
- deepeval/metrics/step_efficiency/__init__.py +1 -0
- deepeval/metrics/step_efficiency/schema.py +11 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
- deepeval/metrics/step_efficiency/template.py +256 -0
- deepeval/metrics/task_completion/task_completion.py +1 -0
- deepeval/metrics/tool_correctness/schema.py +6 -0
- deepeval/metrics/tool_correctness/template.py +88 -0
- deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
- deepeval/metrics/tool_use/__init__.py +1 -0
- deepeval/metrics/tool_use/schema.py +19 -0
- deepeval/metrics/tool_use/template.py +220 -0
- deepeval/metrics/tool_use/tool_use.py +458 -0
- deepeval/metrics/topic_adherence/__init__.py +1 -0
- deepeval/metrics/topic_adherence/schema.py +16 -0
- deepeval/metrics/topic_adherence/template.py +162 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
- deepeval/models/embedding_models/azure_embedding_model.py +37 -36
- deepeval/models/embedding_models/local_embedding_model.py +30 -32
- deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
- deepeval/models/embedding_models/openai_embedding_model.py +22 -31
- deepeval/models/llms/amazon_bedrock_model.py +20 -17
- deepeval/models/llms/openai_model.py +10 -1
- deepeval/models/retry_policy.py +103 -20
- deepeval/openai/extractors.py +61 -16
- deepeval/openai/patch.py +8 -12
- deepeval/openai/types.py +1 -1
- deepeval/openai/utils.py +108 -1
- deepeval/prompt/prompt.py +1 -0
- deepeval/prompt/utils.py +43 -14
- deepeval/simulator/conversation_simulator.py +25 -18
- deepeval/synthesizer/chunking/context_generator.py +9 -1
- deepeval/synthesizer/synthesizer.py +11 -10
- deepeval/test_case/llm_test_case.py +6 -2
- deepeval/test_run/test_run.py +190 -207
- deepeval/tracing/__init__.py +2 -1
- deepeval/tracing/otel/exporter.py +3 -4
- deepeval/tracing/otel/utils.py +23 -4
- deepeval/tracing/trace_context.py +53 -38
- deepeval/tracing/tracing.py +23 -0
- deepeval/tracing/types.py +16 -14
- deepeval/utils.py +21 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/METADATA +1 -1
- {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/RECORD +85 -63
- deepeval/integrations/llama_index/agent/patched.py +0 -68
- deepeval/tracing/message_types/__init__.py +0 -10
- deepeval/tracing/message_types/base.py +0 -6
- deepeval/tracing/message_types/messages.py +0 -14
- deepeval/tracing/message_types/tools.py +0 -18
- {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/LICENSE.md +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/WHEEL +0 -0
- {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from openai import OpenAI, AsyncOpenAI
|
|
2
|
-
from typing import Dict, List
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -15,25 +15,32 @@ retry_local = create_retry_decorator(PS.LOCAL)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
18
|
-
def __init__(
|
|
19
|
-
self
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
api_key: Optional[str] = None,
|
|
21
|
+
base_url: Optional[str] = None,
|
|
22
|
+
model: Optional[str] = None,
|
|
23
|
+
generation_kwargs: Optional[Dict] = None,
|
|
24
|
+
**client_kwargs,
|
|
25
|
+
):
|
|
26
|
+
self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
|
|
27
|
+
EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
|
|
28
|
+
)
|
|
29
|
+
self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
|
|
20
30
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
21
31
|
)
|
|
22
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
32
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
23
33
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
24
34
|
)
|
|
25
|
-
self.
|
|
26
|
-
|
|
27
|
-
)
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
35
|
+
self.client_kwargs = client_kwargs or {}
|
|
36
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
37
|
+
super().__init__(self.model_name)
|
|
30
38
|
|
|
31
39
|
@retry_local
|
|
32
40
|
def embed_text(self, text: str) -> List[float]:
|
|
33
41
|
embedding_model = self.load_model()
|
|
34
42
|
response = embedding_model.embeddings.create(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=[text],
|
|
43
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
37
44
|
)
|
|
38
45
|
return response.data[0].embedding
|
|
39
46
|
|
|
@@ -41,8 +48,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
49
|
embedding_model = self.load_model()
|
|
43
50
|
response = embedding_model.embeddings.create(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
51
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
52
|
)
|
|
47
53
|
return [data.embedding for data in response.data]
|
|
48
54
|
|
|
@@ -50,8 +56,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
57
|
embedding_model = self.load_model(async_mode=True)
|
|
52
58
|
response = await embedding_model.embeddings.create(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=[text],
|
|
59
|
+
model=self.model_name, input=[text], **self.generation_kwargs
|
|
55
60
|
)
|
|
56
61
|
return response.data[0].embedding
|
|
57
62
|
|
|
@@ -59,8 +64,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
65
|
embedding_model = self.load_model(async_mode=True)
|
|
61
66
|
response = await embedding_model.embeddings.create(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
67
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
68
|
)
|
|
65
69
|
return [data.embedding for data in response.data]
|
|
66
70
|
|
|
@@ -76,27 +80,21 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
76
80
|
return self._build_client(OpenAI)
|
|
77
81
|
return self._build_client(AsyncOpenAI)
|
|
78
82
|
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
If Tenacity manages retries, turn off OpenAI SDK retries to avoid double retrying.
|
|
82
|
-
If users opt into SDK retries via DEEPEVAL_SDK_RETRY_PROVIDERS=local, leave them enabled.
|
|
83
|
-
"""
|
|
84
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
85
85
|
if not sdk_retries_for(PS.LOCAL):
|
|
86
|
-
|
|
87
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
88
87
|
|
|
89
|
-
|
|
90
|
-
kw = dict(
|
|
88
|
+
client_init_kwargs = dict(
|
|
91
89
|
api_key=self.api_key,
|
|
92
90
|
base_url=self.base_url,
|
|
93
|
-
**
|
|
91
|
+
**client_kwargs,
|
|
94
92
|
)
|
|
95
93
|
try:
|
|
96
|
-
return cls(**
|
|
94
|
+
return cls(**client_init_kwargs)
|
|
97
95
|
except TypeError as e:
|
|
98
|
-
#
|
|
96
|
+
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
99
97
|
if "max_retries" in str(e):
|
|
100
|
-
|
|
101
|
-
return cls(**
|
|
98
|
+
client_init_kwargs.pop("max_retries", None)
|
|
99
|
+
return cls(**client_init_kwargs)
|
|
102
100
|
raise
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ollama import Client, AsyncClient
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional, Dict
|
|
3
3
|
|
|
4
4
|
from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
@@ -13,27 +13,28 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
16
|
-
def __init__(
|
|
17
|
-
self
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
model: Optional[str] = None,
|
|
19
|
+
host: Optional[str] = None,
|
|
20
|
+
generation_kwargs: Optional[Dict] = None,
|
|
21
|
+
**client_kwargs,
|
|
22
|
+
):
|
|
23
|
+
self.host = host or KEY_FILE_HANDLER.fetch_data(
|
|
18
24
|
EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
|
|
19
25
|
)
|
|
20
|
-
model_name = KEY_FILE_HANDLER.fetch_data(
|
|
26
|
+
self.model_name = model or KEY_FILE_HANDLER.fetch_data(
|
|
21
27
|
EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
|
|
22
28
|
)
|
|
23
|
-
|
|
24
|
-
self.
|
|
25
|
-
|
|
26
|
-
)
|
|
27
|
-
self.args = args
|
|
28
|
-
self.kwargs = kwargs
|
|
29
|
-
super().__init__(model_name)
|
|
29
|
+
self.client_kwargs = client_kwargs or {}
|
|
30
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
31
|
+
super().__init__(self.model_name)
|
|
30
32
|
|
|
31
33
|
@retry_ollama
|
|
32
34
|
def embed_text(self, text: str) -> List[float]:
|
|
33
35
|
embedding_model = self.load_model()
|
|
34
36
|
response = embedding_model.embed(
|
|
35
|
-
model=self.model_name,
|
|
36
|
-
input=text,
|
|
37
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
37
38
|
)
|
|
38
39
|
return response["embeddings"][0]
|
|
39
40
|
|
|
@@ -41,8 +42,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
41
42
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
42
43
|
embedding_model = self.load_model()
|
|
43
44
|
response = embedding_model.embed(
|
|
44
|
-
model=self.model_name,
|
|
45
|
-
input=texts,
|
|
45
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
46
46
|
)
|
|
47
47
|
return response["embeddings"]
|
|
48
48
|
|
|
@@ -50,8 +50,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
50
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
51
|
embedding_model = self.load_model(async_mode=True)
|
|
52
52
|
response = await embedding_model.embed(
|
|
53
|
-
model=self.model_name,
|
|
54
|
-
input=text,
|
|
53
|
+
model=self.model_name, input=text, **self.generation_kwargs
|
|
55
54
|
)
|
|
56
55
|
return response["embeddings"][0]
|
|
57
56
|
|
|
@@ -59,8 +58,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
59
58
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
60
59
|
embedding_model = self.load_model(async_mode=True)
|
|
61
60
|
response = await embedding_model.embed(
|
|
62
|
-
model=self.model_name,
|
|
63
|
-
input=texts,
|
|
61
|
+
model=self.model_name, input=texts, **self.generation_kwargs
|
|
64
62
|
)
|
|
65
63
|
return response["embeddings"]
|
|
66
64
|
|
|
@@ -74,7 +72,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
74
72
|
return self._build_client(AsyncClient)
|
|
75
73
|
|
|
76
74
|
def _build_client(self, cls):
|
|
77
|
-
return cls(host=self.
|
|
75
|
+
return cls(host=self.host, **self.client_kwargs)
|
|
78
76
|
|
|
79
77
|
def get_model_name(self):
|
|
80
78
|
return f"{self.model_name} (Ollama)"
|
|
@@ -19,27 +19,28 @@ default_openai_embedding_model = "text-embedding-3-small"
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
22
|
+
|
|
22
23
|
def __init__(
|
|
23
24
|
self,
|
|
24
25
|
model: Optional[str] = None,
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
openai_api_key: Optional[str] = None,
|
|
27
|
+
generation_kwargs: Optional[Dict] = None,
|
|
28
|
+
**client_kwargs,
|
|
27
29
|
):
|
|
28
|
-
|
|
29
|
-
if
|
|
30
|
+
self.openai_api_key = openai_api_key
|
|
31
|
+
self.model_name = model if model else default_openai_embedding_model
|
|
32
|
+
if self.model_name not in valid_openai_embedding_models:
|
|
30
33
|
raise ValueError(
|
|
31
34
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
32
35
|
)
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.kwargs = kwargs
|
|
36
|
+
self.client_kwargs = client_kwargs or {}
|
|
37
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
36
38
|
|
|
37
39
|
@retry_openai
|
|
38
40
|
def embed_text(self, text: str) -> List[float]:
|
|
39
41
|
client = self.load_model(async_mode=False)
|
|
40
42
|
response = client.embeddings.create(
|
|
41
|
-
input=text,
|
|
42
|
-
model=self.model_name,
|
|
43
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
43
44
|
)
|
|
44
45
|
return response.data[0].embedding
|
|
45
46
|
|
|
@@ -47,8 +48,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
47
48
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
48
49
|
client = self.load_model(async_mode=False)
|
|
49
50
|
response = client.embeddings.create(
|
|
50
|
-
input=texts,
|
|
51
|
-
model=self.model_name,
|
|
51
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
52
52
|
)
|
|
53
53
|
return [item.embedding for item in response.data]
|
|
54
54
|
|
|
@@ -56,8 +56,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
56
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
57
57
|
client = self.load_model(async_mode=True)
|
|
58
58
|
response = await client.embeddings.create(
|
|
59
|
-
input=text,
|
|
60
|
-
model=self.model_name,
|
|
59
|
+
input=text, model=self.model_name, **self.generation_kwargs
|
|
61
60
|
)
|
|
62
61
|
return response.data[0].embedding
|
|
63
62
|
|
|
@@ -65,8 +64,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
64
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
66
65
|
client = self.load_model(async_mode=True)
|
|
67
66
|
response = await client.embeddings.create(
|
|
68
|
-
input=texts,
|
|
69
|
-
model=self.model_name,
|
|
67
|
+
input=texts, model=self.model_name, **self.generation_kwargs
|
|
70
68
|
)
|
|
71
69
|
return [item.embedding for item in response.data]
|
|
72
70
|
|
|
@@ -82,27 +80,20 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
82
80
|
return self._build_client(OpenAI)
|
|
83
81
|
return self._build_client(AsyncOpenAI)
|
|
84
82
|
|
|
85
|
-
def
|
|
86
|
-
|
|
87
|
-
If Tenacity is managing retries, force OpenAI SDK retries off to avoid double retries.
|
|
88
|
-
If the user opts into SDK retries for 'openai' via DEEPEVAL_SDK_RETRY_PROVIDERS,
|
|
89
|
-
leave their retry settings as is.
|
|
90
|
-
"""
|
|
91
|
-
kwargs = dict(self.kwargs or {})
|
|
83
|
+
def _build_client(self, cls):
|
|
84
|
+
client_kwargs = self.client_kwargs.copy()
|
|
92
85
|
if not sdk_retries_for(PS.OPENAI):
|
|
93
|
-
|
|
94
|
-
return kwargs
|
|
86
|
+
client_kwargs["max_retries"] = 0
|
|
95
87
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
**self._client_kwargs(),
|
|
88
|
+
client_init_kwargs = dict(
|
|
89
|
+
api_key=self.openai_api_key,
|
|
90
|
+
**client_kwargs,
|
|
100
91
|
)
|
|
101
92
|
try:
|
|
102
|
-
return cls(**
|
|
93
|
+
return cls(**client_init_kwargs)
|
|
103
94
|
except TypeError as e:
|
|
104
95
|
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
105
96
|
if "max_retries" in str(e):
|
|
106
|
-
|
|
107
|
-
return cls(**
|
|
97
|
+
client_init_kwargs.pop("max_retries", None)
|
|
98
|
+
return cls(**client_init_kwargs)
|
|
108
99
|
raise
|
|
@@ -76,23 +76,26 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
76
76
|
async def a_generate(
|
|
77
77
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
78
78
|
) -> Tuple[Union[str, Dict], float]:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
79
|
+
try:
|
|
80
|
+
payload = self.get_converse_request_body(prompt)
|
|
81
|
+
client = await self._ensure_client()
|
|
82
|
+
response = await client.converse(
|
|
83
|
+
modelId=self.model_id,
|
|
84
|
+
messages=payload["messages"],
|
|
85
|
+
inferenceConfig=payload["inferenceConfig"],
|
|
86
|
+
)
|
|
87
|
+
message = response["output"]["message"]["content"][0]["text"]
|
|
88
|
+
cost = self.calculate_cost(
|
|
89
|
+
response["usage"]["inputTokens"],
|
|
90
|
+
response["usage"]["outputTokens"],
|
|
91
|
+
)
|
|
92
|
+
if schema is None:
|
|
93
|
+
return message, cost
|
|
94
|
+
else:
|
|
95
|
+
json_output = trim_and_load_json(message)
|
|
96
|
+
return schema.model_validate(json_output), cost
|
|
97
|
+
finally:
|
|
98
|
+
await self.close()
|
|
96
99
|
|
|
97
100
|
###############################################
|
|
98
101
|
# Client management
|
|
@@ -8,6 +8,7 @@ from openai import (
|
|
|
8
8
|
AsyncOpenAI,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
|
+
from deepeval.config.settings import get_settings
|
|
11
12
|
from deepeval.constants import ProviderSlug as PS
|
|
12
13
|
from deepeval.models import DeepEvalBaseLLM
|
|
13
14
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
@@ -209,6 +210,11 @@ models_requiring_temperature_1 = [
|
|
|
209
210
|
]
|
|
210
211
|
|
|
211
212
|
|
|
213
|
+
def _request_timeout_seconds() -> float:
|
|
214
|
+
timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
|
|
215
|
+
return timeout if timeout > 0 else 30.0
|
|
216
|
+
|
|
217
|
+
|
|
212
218
|
class GPTModel(DeepEvalBaseLLM):
|
|
213
219
|
def __init__(
|
|
214
220
|
self,
|
|
@@ -387,7 +393,6 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
387
393
|
)
|
|
388
394
|
return schema.model_validate(json_output), cost
|
|
389
395
|
|
|
390
|
-
client: AsyncOpenAI
|
|
391
396
|
completion = await client.chat.completions.create(
|
|
392
397
|
model=self.model_name,
|
|
393
398
|
messages=[{"role": "user", "content": prompt}],
|
|
@@ -501,9 +506,13 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
501
506
|
kwargs = dict(self.kwargs or {})
|
|
502
507
|
if not sdk_retries_for(PS.OPENAI):
|
|
503
508
|
kwargs["max_retries"] = 0
|
|
509
|
+
|
|
510
|
+
if not kwargs.get("timeout"):
|
|
511
|
+
kwargs["timeout"] = _request_timeout_seconds()
|
|
504
512
|
return kwargs
|
|
505
513
|
|
|
506
514
|
def _build_client(self, cls):
|
|
515
|
+
|
|
507
516
|
kw = dict(
|
|
508
517
|
api_key=self._openai_api_key,
|
|
509
518
|
base_url=self.base_url,
|
deepeval/models/retry_policy.py
CHANGED
|
@@ -39,6 +39,7 @@ import itertools
|
|
|
39
39
|
import functools
|
|
40
40
|
import threading
|
|
41
41
|
import logging
|
|
42
|
+
import time
|
|
42
43
|
|
|
43
44
|
from dataclasses import dataclass, field
|
|
44
45
|
from typing import Callable, Iterable, Mapping, Optional, Sequence, Tuple, Union
|
|
@@ -52,6 +53,7 @@ from tenacity import (
|
|
|
52
53
|
)
|
|
53
54
|
from tenacity.stop import stop_base
|
|
54
55
|
from tenacity.wait import wait_base
|
|
56
|
+
from contextvars import ContextVar, copy_context
|
|
55
57
|
|
|
56
58
|
from deepeval.constants import (
|
|
57
59
|
ProviderSlug as PS,
|
|
@@ -65,6 +67,81 @@ Provider = Union[str, PS]
|
|
|
65
67
|
_MAX_TIMEOUT_THREADS = get_settings().DEEPEVAL_TIMEOUT_THREAD_LIMIT
|
|
66
68
|
_TIMEOUT_SEMA = threading.BoundedSemaphore(_MAX_TIMEOUT_THREADS)
|
|
67
69
|
_WORKER_ID = itertools.count(1)
|
|
70
|
+
_OUTER_DEADLINE = ContextVar("deepeval_outer_deadline", default=None)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def set_outer_deadline(seconds: float | None):
|
|
74
|
+
"""Set (or clear) the outer task time budget.
|
|
75
|
+
|
|
76
|
+
Stores a deadline in a local context variable so nested code
|
|
77
|
+
can cooperatively respect a shared budget. Always pair this with
|
|
78
|
+
`reset_outer_deadline(token)` in a `finally` block.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
seconds: Number of seconds from now to set as the deadline. If `None`,
|
|
82
|
+
`0`, or a non-positive value is provided, the deadline is cleared.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
contextvars.Token: The token returned by the underlying ContextVar `.set()`
|
|
86
|
+
call, which must be passed to `reset_outer_deadline` to restore the
|
|
87
|
+
previous value.
|
|
88
|
+
"""
|
|
89
|
+
if seconds and seconds > 0:
|
|
90
|
+
return _OUTER_DEADLINE.set(time.monotonic() + seconds)
|
|
91
|
+
return _OUTER_DEADLINE.set(None)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def reset_outer_deadline(token):
|
|
95
|
+
"""Restore the previous outer deadline set by `set_outer_deadline`.
|
|
96
|
+
|
|
97
|
+
This should be called in a `finally` block to ensure the deadline
|
|
98
|
+
is restored even if an exception occurs.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
token: The `contextvars.Token` returned by `set_outer_deadline`.
|
|
102
|
+
"""
|
|
103
|
+
if token is not None:
|
|
104
|
+
_OUTER_DEADLINE.reset(token)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _remaining_budget() -> float | None:
|
|
108
|
+
dl = _OUTER_DEADLINE.get()
|
|
109
|
+
if dl is None:
|
|
110
|
+
return None
|
|
111
|
+
return max(0.0, dl - time.monotonic())
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _is_budget_spent() -> bool:
|
|
115
|
+
rem = _remaining_budget()
|
|
116
|
+
return rem is not None and rem <= 0.0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def resolve_effective_attempt_timeout():
|
|
120
|
+
"""Resolve the timeout to use for a single provider attempt.
|
|
121
|
+
|
|
122
|
+
Combines the configured per-attempt timeout with any remaining outer budget:
|
|
123
|
+
- If `DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS` is `0` or `None`, returns `0`
|
|
124
|
+
callers should skip `asyncio.wait_for` in this case and rely on the outer cap.
|
|
125
|
+
- If positive and an outer deadline is present, returns
|
|
126
|
+
`min(per_attempt, remaining_budget)`.
|
|
127
|
+
- If positive and no outer deadline is present, returns `per_attempt`.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
float: Seconds to use for the inner per-attempt timeout. `0` means
|
|
131
|
+
disable inner timeout and rely on the outer budget instead.
|
|
132
|
+
"""
|
|
133
|
+
per_attempt = float(
|
|
134
|
+
get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0
|
|
135
|
+
)
|
|
136
|
+
# 0 or None disable inner wait_for. That means rely on outer task cap for timeouts instead.
|
|
137
|
+
if per_attempt <= 0:
|
|
138
|
+
return 0
|
|
139
|
+
# If we do have a positive per-attempt, use up to remaining outer budget.
|
|
140
|
+
rem = _remaining_budget()
|
|
141
|
+
if rem is not None:
|
|
142
|
+
return max(0.0, min(per_attempt, rem))
|
|
143
|
+
return per_attempt
|
|
144
|
+
|
|
68
145
|
|
|
69
146
|
# --------------------------
|
|
70
147
|
# Policy description
|
|
@@ -399,9 +476,10 @@ def make_after_log(slug: str):
|
|
|
399
476
|
if not _logger.isEnabledFor(after_level):
|
|
400
477
|
return
|
|
401
478
|
|
|
479
|
+
show_trace = bool(get_settings().DEEPEVAL_LOG_STACK_TRACES)
|
|
402
480
|
exc_info = (
|
|
403
481
|
(type(exc), exc, getattr(exc, "__traceback__", None))
|
|
404
|
-
if
|
|
482
|
+
if show_trace
|
|
405
483
|
else None
|
|
406
484
|
)
|
|
407
485
|
|
|
@@ -416,7 +494,7 @@ def make_after_log(slug: str):
|
|
|
416
494
|
return _after
|
|
417
495
|
|
|
418
496
|
|
|
419
|
-
def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
|
|
497
|
+
def _make_timeout_error(timeout_seconds: float) -> asyncio.TimeoutError:
|
|
420
498
|
settings = get_settings()
|
|
421
499
|
if logger.isEnabledFor(logging.DEBUG):
|
|
422
500
|
logger.debug(
|
|
@@ -427,12 +505,12 @@ def _make_timeout_error(timeout_seconds: float) -> TimeoutError:
|
|
|
427
505
|
)
|
|
428
506
|
msg = (
|
|
429
507
|
f"call timed out after {timeout_seconds:g}s (per attempt). "
|
|
430
|
-
"Increase
|
|
508
|
+
"Increase DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE (None disables) or reduce work per attempt."
|
|
431
509
|
)
|
|
432
|
-
return TimeoutError(msg)
|
|
510
|
+
return asyncio.TimeoutError(msg)
|
|
433
511
|
|
|
434
512
|
|
|
435
|
-
def
|
|
513
|
+
def run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
|
|
436
514
|
"""
|
|
437
515
|
Run a synchronous callable with a soft timeout enforced by a helper thread,
|
|
438
516
|
with a global cap on concurrent timeout-workers.
|
|
@@ -499,9 +577,11 @@ def _run_sync_with_timeout(func, timeout_seconds, *args, **kwargs):
|
|
|
499
577
|
done = threading.Event()
|
|
500
578
|
result = {"value": None, "exc": None}
|
|
501
579
|
|
|
580
|
+
context = copy_context()
|
|
581
|
+
|
|
502
582
|
def target():
|
|
503
583
|
try:
|
|
504
|
-
result["value"] = func
|
|
584
|
+
result["value"] = context.run(func, *args, **kwargs)
|
|
505
585
|
except BaseException as e:
|
|
506
586
|
result["exc"] = e
|
|
507
587
|
finally:
|
|
@@ -562,37 +642,40 @@ def create_retry_decorator(provider: Provider):
|
|
|
562
642
|
|
|
563
643
|
@functools.wraps(func)
|
|
564
644
|
async def attempt(*args, **kwargs):
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
645
|
+
if _is_budget_spent():
|
|
646
|
+
raise _make_timeout_error(0)
|
|
647
|
+
|
|
648
|
+
per_attempt_timeout = resolve_effective_attempt_timeout()
|
|
649
|
+
|
|
568
650
|
coro = func(*args, **kwargs)
|
|
569
|
-
if
|
|
651
|
+
if per_attempt_timeout > 0:
|
|
570
652
|
try:
|
|
571
|
-
return await asyncio.wait_for(coro,
|
|
572
|
-
except asyncio.TimeoutError as e:
|
|
653
|
+
return await asyncio.wait_for(coro, per_attempt_timeout)
|
|
654
|
+
except (asyncio.TimeoutError, TimeoutError) as e:
|
|
573
655
|
if (
|
|
574
656
|
logger.isEnabledFor(logging.DEBUG)
|
|
575
657
|
and get_settings().DEEPEVAL_VERBOSE_MODE is True
|
|
576
658
|
):
|
|
577
659
|
logger.debug(
|
|
578
660
|
"async timeout after %.3fs (active_threads=%d, tasks=%d)",
|
|
579
|
-
|
|
661
|
+
per_attempt_timeout,
|
|
580
662
|
threading.active_count(),
|
|
581
663
|
len(asyncio.all_tasks()),
|
|
582
664
|
)
|
|
583
|
-
raise _make_timeout_error(
|
|
665
|
+
raise _make_timeout_error(per_attempt_timeout) from e
|
|
584
666
|
return await coro
|
|
585
667
|
|
|
586
668
|
return base_retry(attempt)
|
|
587
669
|
|
|
588
670
|
@functools.wraps(func)
|
|
589
671
|
def attempt(*args, **kwargs):
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
672
|
+
if _is_budget_spent():
|
|
673
|
+
raise _make_timeout_error(0)
|
|
674
|
+
|
|
675
|
+
per_attempt_timeout = resolve_effective_attempt_timeout()
|
|
676
|
+
if per_attempt_timeout > 0:
|
|
677
|
+
return run_sync_with_timeout(
|
|
678
|
+
func, per_attempt_timeout, *args, **kwargs
|
|
596
679
|
)
|
|
597
680
|
return func(*args, **kwargs)
|
|
598
681
|
|