deepeval 3.7.2__py3-none-any.whl → 3.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/human_eval/human_eval.py +2 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/dataset.py +35 -11
- deepeval/dataset/utils.py +2 -0
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/execute.py +4 -1
- deepeval/metrics/answer_relevancy/template.py +4 -4
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/template.py +6 -6
- deepeval/metrics/contextual_recall/template.py +2 -2
- deepeval/metrics/contextual_relevancy/template.py +3 -3
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +4 -4
- deepeval/metrics/faithfulness/template.py +4 -4
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +3 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/embedding_models/azure_embedding_model.py +28 -15
- deepeval/models/embedding_models/local_embedding_model.py +23 -10
- deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
- deepeval/models/embedding_models/openai_embedding_model.py +18 -2
- deepeval/models/llms/anthropic_model.py +17 -5
- deepeval/models/llms/azure_model.py +30 -18
- deepeval/models/llms/deepseek_model.py +22 -12
- deepeval/models/llms/gemini_model.py +120 -87
- deepeval/models/llms/grok_model.py +23 -16
- deepeval/models/llms/kimi_model.py +23 -12
- deepeval/models/llms/litellm_model.py +63 -25
- deepeval/models/llms/local_model.py +26 -18
- deepeval/models/llms/ollama_model.py +17 -7
- deepeval/models/llms/openai_model.py +22 -17
- deepeval/models/llms/portkey_model.py +132 -0
- deepeval/models/mlllms/__init__.py +1 -0
- deepeval/models/mlllms/azure_model.py +343 -0
- deepeval/models/mlllms/gemini_model.py +102 -73
- deepeval/models/mlllms/ollama_model.py +40 -9
- deepeval/models/mlllms/openai_model.py +65 -14
- deepeval/models/utils.py +48 -3
- deepeval/optimization/__init__.py +13 -0
- deepeval/optimization/adapters/__init__.py +2 -0
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
- deepeval/optimization/aggregates.py +14 -0
- deepeval/optimization/configs.py +34 -0
- deepeval/optimization/copro/configs.py +31 -0
- deepeval/optimization/copro/loop.py +837 -0
- deepeval/optimization/gepa/__init__.py +7 -0
- deepeval/optimization/gepa/configs.py +115 -0
- deepeval/optimization/gepa/loop.py +677 -0
- deepeval/optimization/miprov2/configs.py +134 -0
- deepeval/optimization/miprov2/loop.py +785 -0
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +458 -0
- deepeval/optimization/policies/__init__.py +16 -0
- deepeval/optimization/policies/selection.py +166 -0
- deepeval/optimization/policies/tie_breaker.py +67 -0
- deepeval/optimization/prompt_optimizer.py +462 -0
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +33 -0
- deepeval/optimization/simba/loop.py +983 -0
- deepeval/optimization/simba/types.py +15 -0
- deepeval/optimization/types.py +361 -0
- deepeval/optimization/utils.py +598 -0
- deepeval/prompt/prompt.py +10 -5
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +6 -1
- deepeval/tracing/context.py +3 -0
- deepeval/tracing/tracing.py +22 -11
- deepeval/utils.py +24 -0
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/RECORD +92 -66
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +1 -1
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
|
@@ -74,13 +74,13 @@ class MultimodalContextualRelevancyTemplate:
|
|
|
74
74
|
{{
|
|
75
75
|
"verdicts": [
|
|
76
76
|
{{
|
|
77
|
-
"verdict": "yes",
|
|
78
77
|
"statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
|
|
78
|
+
"verdict": "yes"
|
|
79
79
|
}},
|
|
80
80
|
{{
|
|
81
|
-
"verdict": "no",
|
|
82
81
|
"statement": "There was a cat.",
|
|
83
|
-
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
|
|
82
|
+
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
|
|
83
|
+
"verdict": "no"
|
|
84
84
|
}}
|
|
85
85
|
]
|
|
86
86
|
}}
|
|
@@ -107,24 +107,24 @@ class MultimodalFaithfulnessTemplate:
|
|
|
107
107
|
{{
|
|
108
108
|
"verdicts": [
|
|
109
109
|
{{
|
|
110
|
-
"
|
|
111
|
-
"
|
|
110
|
+
"reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction.",
|
|
111
|
+
"verdict": "idk"
|
|
112
112
|
}},
|
|
113
113
|
{{
|
|
114
|
-
"
|
|
115
|
-
"
|
|
114
|
+
"reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context.",
|
|
115
|
+
"verdict": "idk"
|
|
116
116
|
}},
|
|
117
117
|
{{
|
|
118
118
|
"verdict": "yes"
|
|
119
119
|
}},
|
|
120
120
|
{{
|
|
121
|
-
"
|
|
122
|
-
"
|
|
121
|
+
"reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead.",
|
|
122
|
+
"verdict": "no"
|
|
123
123
|
}},
|
|
124
124
|
{{
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
}}
|
|
125
|
+
"reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead.",
|
|
126
|
+
"verdict": "no"
|
|
127
|
+
}}
|
|
128
128
|
]
|
|
129
129
|
}}
|
|
130
130
|
===== END OF EXAMPLE ======
|
|
@@ -92,8 +92,8 @@ class MultimodalGEvalTemplate:
|
|
|
92
92
|
---
|
|
93
93
|
**Example JSON:**
|
|
94
94
|
{{
|
|
95
|
-
"
|
|
96
|
-
"
|
|
95
|
+
"reason": "your concise and informative reason here",
|
|
96
|
+
"score": {score_range[0]}
|
|
97
97
|
}}
|
|
98
98
|
|
|
99
99
|
JSON:
|
|
@@ -136,8 +136,8 @@ class MultimodalGEvalTemplate:
|
|
|
136
136
|
|
|
137
137
|
Example JSON:
|
|
138
138
|
{{
|
|
139
|
-
"
|
|
140
|
-
"
|
|
139
|
+
"reason": "The text does not follow the evaluation steps provided.",
|
|
140
|
+
"score": 0
|
|
141
141
|
}}
|
|
142
142
|
**
|
|
143
143
|
|
|
@@ -26,12 +26,12 @@ Example JSON:
|
|
|
26
26
|
"verdict": "yes"
|
|
27
27
|
}},
|
|
28
28
|
{{
|
|
29
|
-
"
|
|
30
|
-
"
|
|
29
|
+
"reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky.",
|
|
30
|
+
"verdict": "no"
|
|
31
31
|
}},
|
|
32
32
|
{{
|
|
33
|
-
"
|
|
34
|
-
"
|
|
33
|
+
"reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely.",
|
|
34
|
+
"verdict": "no"
|
|
35
35
|
}}
|
|
36
36
|
]
|
|
37
37
|
}}
|
|
@@ -71,15 +71,15 @@ Example JSON:
|
|
|
71
71
|
{{
|
|
72
72
|
"verdicts": [
|
|
73
73
|
{{
|
|
74
|
-
"
|
|
75
|
-
"
|
|
74
|
+
"reason": "The actual output uses words like "garbage," "tasteless weeds," etc., ridicules vegan food and those who choose it, and attacks people who enjoy vegan food, calling them "pretentious losers. Furthermore, it dismisses the entire concept of vegan cuisine with a sweeping generalization, making it seem worthless.",
|
|
75
|
+
"verdict": "yes"
|
|
76
76
|
}},
|
|
77
77
|
{{
|
|
78
78
|
"verdict": "no"
|
|
79
79
|
}},
|
|
80
80
|
{{
|
|
81
|
-
"
|
|
82
|
-
"
|
|
81
|
+
"reason": "'Sarah always meant well' sounds positive but are undermined by the surrounding criticism such as 'can't help but sign', which can be considered a personal attack.",
|
|
82
|
+
"verdict": "yes"
|
|
83
83
|
}}
|
|
84
84
|
]
|
|
85
85
|
}}
|
|
@@ -33,8 +33,8 @@ Example Messages:
|
|
|
33
33
|
|
|
34
34
|
Example JSON:
|
|
35
35
|
{{
|
|
36
|
-
"
|
|
37
|
-
"
|
|
36
|
+
"reason": "The LLM responded 'isn't it a nice day today' to a message that asked about how to treat a sore throat, which is completely irrelevant.",
|
|
37
|
+
"verdict": "no"
|
|
38
38
|
}}
|
|
39
39
|
===== END OF EXAMPLE ======
|
|
40
40
|
You MUST ONLY provide a verdict for the LAST message on the list but MUST USE context from the previous messages.
|
deepeval/metrics/utils.py
CHANGED
|
@@ -25,6 +25,7 @@ from deepeval.models import (
|
|
|
25
25
|
MultimodalOpenAIModel,
|
|
26
26
|
MultimodalGeminiModel,
|
|
27
27
|
MultimodalOllamaModel,
|
|
28
|
+
MultimodalAzureOpenAIMLLMModel,
|
|
28
29
|
AmazonBedrockModel,
|
|
29
30
|
LiteLLMModel,
|
|
30
31
|
KimiModel,
|
|
@@ -514,6 +515,8 @@ def initialize_multimodal_model(
|
|
|
514
515
|
return MultimodalGeminiModel(), True
|
|
515
516
|
if should_use_ollama_model():
|
|
516
517
|
return MultimodalOllamaModel(), True
|
|
518
|
+
elif should_use_azure_openai():
|
|
519
|
+
return MultimodalAzureOpenAIMLLMModel(model_name=model), True
|
|
517
520
|
elif isinstance(model, str) or model is None:
|
|
518
521
|
return MultimodalOpenAIModel(model=model), True
|
|
519
522
|
raise TypeError(
|
deepeval/models/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from deepeval.models.mlllms import (
|
|
|
21
21
|
MultimodalOpenAIModel,
|
|
22
22
|
MultimodalOllamaModel,
|
|
23
23
|
MultimodalGeminiModel,
|
|
24
|
+
MultimodalAzureOpenAIMLLMModel,
|
|
24
25
|
)
|
|
25
26
|
from deepeval.models.embedding_models import (
|
|
26
27
|
OpenAIEmbeddingModel,
|
|
@@ -48,6 +49,7 @@ __all__ = [
|
|
|
48
49
|
"MultimodalOpenAIModel",
|
|
49
50
|
"MultimodalOllamaModel",
|
|
50
51
|
"MultimodalGeminiModel",
|
|
52
|
+
"MultimodalAzureOpenAIMLLMModel",
|
|
51
53
|
"OpenAIEmbeddingModel",
|
|
52
54
|
"AzureOpenAIEmbeddingModel",
|
|
53
55
|
"LocalEmbeddingModel",
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
from typing import Dict, List, Optional
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
KEY_FILE_HANDLER,
|
|
7
|
-
)
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
8
6
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
9
7
|
from deepeval.models.retry_policy import (
|
|
10
8
|
create_retry_decorator,
|
|
11
9
|
sdk_retries_for,
|
|
12
10
|
)
|
|
13
11
|
from deepeval.constants import ProviderSlug as PS
|
|
12
|
+
from deepeval.models.utils import require_secret_api_key
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
@@ -27,18 +26,25 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
27
26
|
generation_kwargs: Optional[Dict] = None,
|
|
28
27
|
**client_kwargs,
|
|
29
28
|
):
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
settings = get_settings()
|
|
30
|
+
|
|
31
|
+
if openai_api_key is not None:
|
|
32
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
33
|
+
self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
|
|
34
|
+
else:
|
|
35
|
+
self.openai_api_key = settings.AZURE_OPENAI_API_KEY
|
|
36
|
+
|
|
33
37
|
self.openai_api_version = (
|
|
34
|
-
openai_api_version
|
|
35
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
|
|
38
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
36
39
|
)
|
|
37
|
-
self.azure_endpoint =
|
|
38
|
-
|
|
40
|
+
self.azure_endpoint = (
|
|
41
|
+
azure_endpoint
|
|
42
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
43
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
39
44
|
)
|
|
40
|
-
|
|
41
|
-
|
|
45
|
+
|
|
46
|
+
self.azure_deployment = (
|
|
47
|
+
azure_deployment or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
|
|
42
48
|
)
|
|
43
49
|
self.client_kwargs = client_kwargs or {}
|
|
44
50
|
self.model_name = model or self.azure_deployment
|
|
@@ -86,12 +92,19 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
86
92
|
return self._build_client(AsyncAzureOpenAI)
|
|
87
93
|
|
|
88
94
|
def _build_client(self, cls):
|
|
95
|
+
api_key = require_secret_api_key(
|
|
96
|
+
self.openai_api_key,
|
|
97
|
+
provider_label="AzureOpenAI",
|
|
98
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
99
|
+
param_hint="`openai_api_key` to AzureOpenAIEmbeddingModel(...)",
|
|
100
|
+
)
|
|
101
|
+
|
|
89
102
|
client_kwargs = self.client_kwargs.copy()
|
|
90
103
|
if not sdk_retries_for(PS.AZURE):
|
|
91
104
|
client_kwargs["max_retries"] = 0
|
|
92
105
|
|
|
93
106
|
client_init_kwargs = dict(
|
|
94
|
-
api_key=
|
|
107
|
+
api_key=api_key,
|
|
95
108
|
api_version=self.openai_api_version,
|
|
96
109
|
azure_endpoint=self.azure_endpoint,
|
|
97
110
|
azure_deployment=self.azure_deployment,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from openai import OpenAI, AsyncOpenAI
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
|
+
from pydantic import SecretStr
|
|
3
4
|
|
|
4
|
-
from deepeval.
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
|
+
from deepeval.models.utils import require_secret_api_key
|
|
5
7
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
8
|
from deepeval.models.retry_policy import (
|
|
7
9
|
create_retry_decorator,
|
|
@@ -23,15 +25,19 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
23
25
|
generation_kwargs: Optional[Dict] = None,
|
|
24
26
|
**client_kwargs,
|
|
25
27
|
):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
settings = get_settings()
|
|
29
|
+
if api_key is not None:
|
|
30
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
31
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
32
|
+
else:
|
|
33
|
+
self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
|
|
34
|
+
|
|
35
|
+
self.base_url = (
|
|
36
|
+
base_url
|
|
37
|
+
or settings.LOCAL_EMBEDDING_BASE_URL
|
|
38
|
+
and str(settings.LOCAL_EMBEDDING_BASE_URL)
|
|
34
39
|
)
|
|
40
|
+
self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
35
41
|
self.client_kwargs = client_kwargs or {}
|
|
36
42
|
self.generation_kwargs = generation_kwargs or {}
|
|
37
43
|
super().__init__(self.model_name)
|
|
@@ -81,12 +87,19 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
81
87
|
return self._build_client(AsyncOpenAI)
|
|
82
88
|
|
|
83
89
|
def _build_client(self, cls):
|
|
90
|
+
api_key = require_secret_api_key(
|
|
91
|
+
self.api_key,
|
|
92
|
+
provider_label="OpenAI",
|
|
93
|
+
env_var_name="LOCAL_EMBEDDING_API_KEY",
|
|
94
|
+
param_hint="`api_key` to LocalEmbeddingModel(...)",
|
|
95
|
+
)
|
|
96
|
+
|
|
84
97
|
client_kwargs = self.client_kwargs.copy()
|
|
85
98
|
if not sdk_retries_for(PS.LOCAL):
|
|
86
99
|
client_kwargs["max_retries"] = 0
|
|
87
100
|
|
|
88
101
|
client_init_kwargs = dict(
|
|
89
|
-
api_key=
|
|
102
|
+
api_key=api_key,
|
|
90
103
|
base_url=self.base_url,
|
|
91
104
|
**client_kwargs,
|
|
92
105
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from ollama import Client, AsyncClient
|
|
2
2
|
from typing import List, Optional, Dict
|
|
3
3
|
|
|
4
|
-
from deepeval.
|
|
4
|
+
from deepeval.config.settings import get_settings
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
6
|
from deepeval.models.retry_policy import (
|
|
7
7
|
create_retry_decorator,
|
|
@@ -20,12 +20,14 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
20
20
|
generation_kwargs: Optional[Dict] = None,
|
|
21
21
|
**client_kwargs,
|
|
22
22
|
):
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
settings = get_settings()
|
|
24
|
+
|
|
25
|
+
self.host = (
|
|
26
|
+
host
|
|
27
|
+
or settings.LOCAL_EMBEDDING_BASE_URL
|
|
28
|
+
and str(settings.LOCAL_EMBEDDING_BASE_URL)
|
|
28
29
|
)
|
|
30
|
+
self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
29
31
|
self.client_kwargs = client_kwargs or {}
|
|
30
32
|
self.generation_kwargs = generation_kwargs or {}
|
|
31
33
|
super().__init__(self.model_name)
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from typing import Dict, Optional, List
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
|
+
from deepeval.models.utils import require_secret_api_key
|
|
3
7
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
4
8
|
from deepeval.models.retry_policy import (
|
|
5
9
|
create_retry_decorator,
|
|
@@ -27,7 +31,12 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
27
31
|
generation_kwargs: Optional[Dict] = None,
|
|
28
32
|
**client_kwargs,
|
|
29
33
|
):
|
|
30
|
-
|
|
34
|
+
if openai_api_key is not None:
|
|
35
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
36
|
+
self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
|
|
37
|
+
else:
|
|
38
|
+
self.openai_api_key = get_settings().OPENAI_API_KEY
|
|
39
|
+
|
|
31
40
|
self.model_name = model if model else default_openai_embedding_model
|
|
32
41
|
if self.model_name not in valid_openai_embedding_models:
|
|
33
42
|
raise ValueError(
|
|
@@ -81,12 +90,19 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
81
90
|
return self._build_client(AsyncOpenAI)
|
|
82
91
|
|
|
83
92
|
def _build_client(self, cls):
|
|
93
|
+
openai_api_key = require_secret_api_key(
|
|
94
|
+
self.openai_api_key,
|
|
95
|
+
provider_label="OpenAI",
|
|
96
|
+
env_var_name="OPENAI_API_KEY",
|
|
97
|
+
param_hint="`openai_api_key` to OpenAIEmbeddingModel(...)",
|
|
98
|
+
)
|
|
99
|
+
|
|
84
100
|
client_kwargs = self.client_kwargs.copy()
|
|
85
101
|
if not sdk_retries_for(PS.OPENAI):
|
|
86
102
|
client_kwargs["max_retries"] = 0
|
|
87
103
|
|
|
88
104
|
client_init_kwargs = dict(
|
|
89
|
-
api_key=
|
|
105
|
+
api_key=openai_api_key,
|
|
90
106
|
**client_kwargs,
|
|
91
107
|
)
|
|
92
108
|
try:
|
|
@@ -2,7 +2,7 @@ import warnings
|
|
|
2
2
|
|
|
3
3
|
from typing import Optional, Tuple, Union, Dict
|
|
4
4
|
from anthropic import Anthropic, AsyncAnthropic
|
|
5
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
6
|
|
|
7
7
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
8
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
@@ -10,7 +10,7 @@ from deepeval.models.retry_policy import (
|
|
|
10
10
|
create_retry_decorator,
|
|
11
11
|
sdk_retries_for,
|
|
12
12
|
)
|
|
13
|
-
from deepeval.models.utils import parse_model_name
|
|
13
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
14
14
|
from deepeval.config.settings import get_settings
|
|
15
15
|
from deepeval.constants import ProviderSlug as PS
|
|
16
16
|
|
|
@@ -41,7 +41,14 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
41
41
|
**kwargs,
|
|
42
42
|
):
|
|
43
43
|
model_name = parse_model_name(model)
|
|
44
|
-
|
|
44
|
+
|
|
45
|
+
if _anthropic_api_key is not None:
|
|
46
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
47
|
+
self._anthropic_api_key: SecretStr | None = SecretStr(
|
|
48
|
+
_anthropic_api_key
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
self._anthropic_api_key = get_settings().ANTHROPIC_API_KEY
|
|
45
52
|
|
|
46
53
|
if temperature < 0:
|
|
47
54
|
raise ValueError("Temperature must be >= 0.")
|
|
@@ -155,9 +162,14 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
155
162
|
return kwargs
|
|
156
163
|
|
|
157
164
|
def _build_client(self, cls):
|
|
158
|
-
|
|
165
|
+
api_key = require_secret_api_key(
|
|
166
|
+
self._anthropic_api_key,
|
|
167
|
+
provider_label="Anthropic",
|
|
168
|
+
env_var_name="ANTHROPIC_API_KEY",
|
|
169
|
+
param_hint="`_anthropic_api_key` to AnthropicModel(...)",
|
|
170
|
+
)
|
|
159
171
|
kw = dict(
|
|
160
|
-
api_key=
|
|
172
|
+
api_key=api_key,
|
|
161
173
|
**self._client_kwargs(),
|
|
162
174
|
)
|
|
163
175
|
try:
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
3
|
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from pydantic import BaseModel
|
|
4
|
+
from pydantic import BaseModel, SecretStr
|
|
5
5
|
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
6
7
|
from deepeval.models import DeepEvalBaseLLM
|
|
7
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
8
8
|
from deepeval.models.llms.openai_model import (
|
|
9
9
|
structured_outputs_models,
|
|
10
10
|
json_mode_models,
|
|
@@ -16,7 +16,7 @@ from deepeval.models.retry_policy import (
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
19
|
-
from deepeval.models.utils import parse_model_name
|
|
19
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
20
20
|
from deepeval.constants import ProviderSlug as PS
|
|
21
21
|
|
|
22
22
|
|
|
@@ -35,24 +35,29 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
35
35
|
generation_kwargs: Optional[Dict] = None,
|
|
36
36
|
**kwargs,
|
|
37
37
|
):
|
|
38
|
+
settings = get_settings()
|
|
39
|
+
|
|
38
40
|
# fetch Azure deployment parameters
|
|
39
|
-
model_name = model_name or
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
41
|
+
model_name = model_name or settings.AZURE_MODEL_NAME
|
|
42
|
+
self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
43
|
+
|
|
44
|
+
if azure_openai_api_key is not None:
|
|
45
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
46
|
+
self.azure_openai_api_key: SecretStr | None = SecretStr(
|
|
47
|
+
azure_openai_api_key
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
|
|
51
|
+
|
|
49
52
|
self.openai_api_version = (
|
|
50
|
-
openai_api_version
|
|
51
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
|
|
53
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
52
54
|
)
|
|
53
|
-
self.azure_endpoint =
|
|
54
|
-
|
|
55
|
+
self.azure_endpoint = (
|
|
56
|
+
azure_endpoint
|
|
57
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
58
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
55
59
|
)
|
|
60
|
+
|
|
56
61
|
if temperature < 0:
|
|
57
62
|
raise ValueError("Temperature must be >= 0.")
|
|
58
63
|
self.temperature = temperature
|
|
@@ -270,8 +275,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
270
275
|
return kwargs
|
|
271
276
|
|
|
272
277
|
def _build_client(self, cls):
|
|
278
|
+
api_key = require_secret_api_key(
|
|
279
|
+
self.azure_openai_api_key,
|
|
280
|
+
provider_label="AzureOpenAI",
|
|
281
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
282
|
+
param_hint="`azure_openai_api_key` to AzureOpenAIModel(...)",
|
|
283
|
+
)
|
|
284
|
+
|
|
273
285
|
kw = dict(
|
|
274
|
-
api_key=
|
|
286
|
+
api_key=api_key,
|
|
275
287
|
api_version=self.openai_api_version,
|
|
276
288
|
azure_endpoint=self.azure_endpoint,
|
|
277
289
|
azure_deployment=self.deployment_name,
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from typing import Optional, Tuple, Union, Dict
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
4
4
|
|
|
5
|
-
from deepeval.
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
6
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
7
|
+
from deepeval.models.utils import require_secret_api_key
|
|
7
8
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
9
|
from deepeval.models.retry_policy import (
|
|
9
10
|
create_retry_decorator,
|
|
@@ -36,25 +37,27 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
36
37
|
generation_kwargs: Optional[Dict] = None,
|
|
37
38
|
**kwargs,
|
|
38
39
|
):
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
settings = get_settings()
|
|
41
|
+
|
|
42
|
+
model_name = model or settings.DEEPSEEK_MODEL_NAME
|
|
42
43
|
if model_name not in model_pricing:
|
|
43
44
|
raise ValueError(
|
|
44
45
|
f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
|
|
45
46
|
)
|
|
46
|
-
temperature_from_key =
|
|
47
|
-
ModelKeyValues.TEMPERATURE
|
|
48
|
-
)
|
|
47
|
+
temperature_from_key = settings.TEMPERATURE
|
|
49
48
|
if temperature_from_key is None:
|
|
50
49
|
self.temperature = temperature
|
|
51
50
|
else:
|
|
52
51
|
self.temperature = float(temperature_from_key)
|
|
53
52
|
if self.temperature < 0:
|
|
54
53
|
raise ValueError("Temperature must be >= 0.")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
|
|
55
|
+
if api_key is not None:
|
|
56
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
57
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
58
|
+
else:
|
|
59
|
+
self.api_key = settings.DEEPSEEK_API_KEY
|
|
60
|
+
|
|
58
61
|
self.base_url = "https://api.deepseek.com"
|
|
59
62
|
self.kwargs = kwargs
|
|
60
63
|
self.generation_kwargs = generation_kwargs or {}
|
|
@@ -167,8 +170,15 @@ class DeepSeekModel(DeepEvalBaseLLM):
|
|
|
167
170
|
return kwargs
|
|
168
171
|
|
|
169
172
|
def _build_client(self, cls):
|
|
173
|
+
api_key = require_secret_api_key(
|
|
174
|
+
self.api_key,
|
|
175
|
+
provider_label="DeepSeek",
|
|
176
|
+
env_var_name="DEEPSEEK_API_KEY",
|
|
177
|
+
param_hint="`api_key` to DeepSeekModel(...)",
|
|
178
|
+
)
|
|
179
|
+
|
|
170
180
|
kw = dict(
|
|
171
|
-
api_key=
|
|
181
|
+
api_key=api_key,
|
|
172
182
|
base_url=self.base_url,
|
|
173
183
|
**self._client_kwargs(),
|
|
174
184
|
)
|