deepeval 3.7.2__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/benchmarks/human_eval/human_eval.py +2 -1
  3. deepeval/cli/test.py +1 -1
  4. deepeval/config/settings.py +102 -13
  5. deepeval/dataset/dataset.py +35 -11
  6. deepeval/dataset/utils.py +2 -0
  7. deepeval/evaluate/configs.py +1 -1
  8. deepeval/evaluate/execute.py +4 -1
  9. deepeval/metrics/answer_relevancy/template.py +4 -4
  10. deepeval/metrics/argument_correctness/template.py +2 -2
  11. deepeval/metrics/bias/template.py +3 -3
  12. deepeval/metrics/contextual_precision/template.py +6 -6
  13. deepeval/metrics/contextual_recall/template.py +2 -2
  14. deepeval/metrics/contextual_relevancy/template.py +3 -3
  15. deepeval/metrics/conversation_completeness/template.py +2 -2
  16. deepeval/metrics/conversational_dag/templates.py +4 -4
  17. deepeval/metrics/conversational_g_eval/template.py +4 -3
  18. deepeval/metrics/dag/templates.py +4 -4
  19. deepeval/metrics/faithfulness/template.py +4 -4
  20. deepeval/metrics/hallucination/template.py +4 -4
  21. deepeval/metrics/misuse/template.py +2 -2
  22. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  23. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  24. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  25. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  26. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  27. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  28. deepeval/metrics/non_advice/template.py +2 -2
  29. deepeval/metrics/pii_leakage/template.py +2 -2
  30. deepeval/metrics/prompt_alignment/template.py +4 -4
  31. deepeval/metrics/role_violation/template.py +2 -2
  32. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  33. deepeval/metrics/toxicity/template.py +4 -4
  34. deepeval/metrics/turn_relevancy/template.py +2 -2
  35. deepeval/metrics/utils.py +3 -0
  36. deepeval/models/__init__.py +2 -0
  37. deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  38. deepeval/models/embedding_models/local_embedding_model.py +23 -10
  39. deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  40. deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  41. deepeval/models/llms/anthropic_model.py +17 -5
  42. deepeval/models/llms/azure_model.py +30 -18
  43. deepeval/models/llms/deepseek_model.py +22 -12
  44. deepeval/models/llms/gemini_model.py +120 -87
  45. deepeval/models/llms/grok_model.py +23 -16
  46. deepeval/models/llms/kimi_model.py +23 -12
  47. deepeval/models/llms/litellm_model.py +63 -25
  48. deepeval/models/llms/local_model.py +26 -18
  49. deepeval/models/llms/ollama_model.py +17 -7
  50. deepeval/models/llms/openai_model.py +22 -17
  51. deepeval/models/llms/portkey_model.py +132 -0
  52. deepeval/models/mlllms/__init__.py +1 -0
  53. deepeval/models/mlllms/azure_model.py +343 -0
  54. deepeval/models/mlllms/gemini_model.py +102 -73
  55. deepeval/models/mlllms/ollama_model.py +40 -9
  56. deepeval/models/mlllms/openai_model.py +65 -14
  57. deepeval/models/utils.py +48 -3
  58. deepeval/optimization/__init__.py +13 -0
  59. deepeval/optimization/adapters/__init__.py +2 -0
  60. deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  61. deepeval/optimization/aggregates.py +14 -0
  62. deepeval/optimization/configs.py +34 -0
  63. deepeval/optimization/copro/configs.py +31 -0
  64. deepeval/optimization/copro/loop.py +837 -0
  65. deepeval/optimization/gepa/__init__.py +7 -0
  66. deepeval/optimization/gepa/configs.py +115 -0
  67. deepeval/optimization/gepa/loop.py +677 -0
  68. deepeval/optimization/miprov2/configs.py +134 -0
  69. deepeval/optimization/miprov2/loop.py +785 -0
  70. deepeval/optimization/mutations/__init__.py +0 -0
  71. deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  72. deepeval/optimization/policies/__init__.py +16 -0
  73. deepeval/optimization/policies/selection.py +166 -0
  74. deepeval/optimization/policies/tie_breaker.py +67 -0
  75. deepeval/optimization/prompt_optimizer.py +462 -0
  76. deepeval/optimization/simba/__init__.py +0 -0
  77. deepeval/optimization/simba/configs.py +33 -0
  78. deepeval/optimization/simba/loop.py +983 -0
  79. deepeval/optimization/simba/types.py +15 -0
  80. deepeval/optimization/types.py +361 -0
  81. deepeval/optimization/utils.py +598 -0
  82. deepeval/prompt/prompt.py +10 -5
  83. deepeval/test_run/cache.py +2 -0
  84. deepeval/test_run/test_run.py +6 -1
  85. deepeval/tracing/context.py +3 -0
  86. deepeval/tracing/tracing.py +22 -11
  87. deepeval/utils.py +24 -0
  88. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
  89. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/RECORD +92 -66
  90. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +1 -1
  91. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
  92. {deepeval-3.7.2.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
@@ -74,13 +74,13 @@ class MultimodalContextualRelevancyTemplate:
74
74
  {{
75
75
  "verdicts": [
76
76
  {{
77
- "verdict": "yes",
78
77
  "statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
78
+ "verdict": "yes"
79
79
  }},
80
80
  {{
81
- "verdict": "no",
82
81
  "statement": "There was a cat.",
83
- "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
82
+ "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
83
+ "verdict": "no"
84
84
  }}
85
85
  ]
86
86
  }}
@@ -107,24 +107,24 @@ class MultimodalFaithfulnessTemplate:
107
107
  {{
108
108
  "verdicts": [
109
109
  {{
110
- "verdict": "idk",
111
- "reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction."
110
+ "reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction.",
111
+ "verdict": "idk"
112
112
  }},
113
113
  {{
114
- "verdict": "idk",
115
- "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
114
+ "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context.",
115
+ "verdict": "idk"
116
116
  }},
117
117
  {{
118
118
  "verdict": "yes"
119
119
  }},
120
120
  {{
121
- "verdict": "no",
122
- "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead."
121
+ "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead.",
122
+ "verdict": "no"
123
123
  }},
124
124
  {{
125
- "verdict": "no",
126
- "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead."
127
- }},
125
+ "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead.",
126
+ "verdict": "no"
127
+ }}
128
128
  ]
129
129
  }}
130
130
  ===== END OF EXAMPLE ======
@@ -92,8 +92,8 @@ class MultimodalGEvalTemplate:
92
92
  ---
93
93
  **Example JSON:**
94
94
  {{
95
- "score": {score_range[0]},
96
- "reason": "your concise and informative reason here"
95
+ "reason": "your concise and informative reason here",
96
+ "score": {score_range[0]}
97
97
  }}
98
98
 
99
99
  JSON:
@@ -136,8 +136,8 @@ class MultimodalGEvalTemplate:
136
136
 
137
137
  Example JSON:
138
138
  {{
139
- "score": 0,
140
- "reason": "The text does not follow the evaluation steps provided."
139
+ "reason": "The text does not follow the evaluation steps provided.",
140
+ "score": 0
141
141
  }}
142
142
  **
143
143
 
@@ -42,8 +42,8 @@ Example JSON:
42
42
  {{
43
43
  "verdicts": [
44
44
  {{
45
- "verdict": "yes",
46
- "reason": "Contains specific investment advice"
45
+ "reason": "Contains specific investment advice",
46
+ "verdict": "yes"
47
47
  }}
48
48
  ]
49
49
  }}
@@ -39,8 +39,8 @@ Example JSON:
39
39
  {{
40
40
  "verdicts": [
41
41
  {{
42
- "verdict": "yes",
43
- "reason": "Contains personal phone number"
42
+ "reason": "Contains personal phone number",
43
+ "verdict": "yes"
44
44
  }}
45
45
  ]
46
46
  }}
@@ -26,12 +26,12 @@ Example JSON:
26
26
  "verdict": "yes"
27
27
  }},
28
28
  {{
29
- "verdict": "no",
30
- "reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky."
29
+ "reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky.",
30
+ "verdict": "no"
31
31
  }},
32
32
  {{
33
- "verdict": "no",
34
- "reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely."
33
+ "reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely.",
34
+ "verdict": "no"
35
35
  }}
36
36
  ]
37
37
  }}
@@ -39,8 +39,8 @@ Example JSON:
39
39
  {{
40
40
  "verdicts": [
41
41
  {{
42
- "verdict": "yes",
43
- "reason": "AI is pretending to be human"
42
+ "reason": "AI is pretending to be human",
43
+ "verdict": "yes"
44
44
  }}
45
45
  ]
46
46
  }}
@@ -231,4 +231,4 @@ class StepEfficiencyMetric(BaseMetric):
231
231
 
232
232
  @property
233
233
  def __name__(self):
234
- return "Execution Efficiency"
234
+ return "Step Efficiency"
@@ -71,15 +71,15 @@ Example JSON:
71
71
  {{
72
72
  "verdicts": [
73
73
  {{
74
- "verdict": "yes",
75
- "reason": "The actual output uses words like "garbage," "tasteless weeds," etc., ridicules vegan food and those who choose it, and attacks people who enjoy vegan food, calling them "pretentious losers. Furthermore, it dismisses the entire concept of vegan cuisine with a sweeping generalization, making it seem worthless."
74
+ "reason": "The actual output uses words like "garbage," "tasteless weeds," etc., ridicules vegan food and those who choose it, and attacks people who enjoy vegan food, calling them "pretentious losers. Furthermore, it dismisses the entire concept of vegan cuisine with a sweeping generalization, making it seem worthless.",
75
+ "verdict": "yes"
76
76
  }},
77
77
  {{
78
78
  "verdict": "no"
79
79
  }},
80
80
  {{
81
- "verdict": "yes":
82
- "reason": "'Sarah always meant well' sounds positive but are undermined by the surrounding criticism such as 'can't help but sign', which can be considered a personal attack."
81
+ "reason": "'Sarah always meant well' sounds positive but are undermined by the surrounding criticism such as 'can't help but sign', which can be considered a personal attack.",
82
+ "verdict": "yes"
83
83
  }}
84
84
  ]
85
85
  }}
@@ -33,8 +33,8 @@ Example Messages:
33
33
 
34
34
  Example JSON:
35
35
  {{
36
- "verdict": "no",
37
- "reason": "The LLM responded 'isn't it a nice day today' to a message that asked about how to treat a sore throat, which is completely irrelevant."
36
+ "reason": "The LLM responded 'isn't it a nice day today' to a message that asked about how to treat a sore throat, which is completely irrelevant.",
37
+ "verdict": "no"
38
38
  }}
39
39
  ===== END OF EXAMPLE ======
40
40
  You MUST ONLY provide a verdict for the LAST message on the list but MUST USE context from the previous messages.
deepeval/metrics/utils.py CHANGED
@@ -25,6 +25,7 @@ from deepeval.models import (
25
25
  MultimodalOpenAIModel,
26
26
  MultimodalGeminiModel,
27
27
  MultimodalOllamaModel,
28
+ MultimodalAzureOpenAIMLLMModel,
28
29
  AmazonBedrockModel,
29
30
  LiteLLMModel,
30
31
  KimiModel,
@@ -514,6 +515,8 @@ def initialize_multimodal_model(
514
515
  return MultimodalGeminiModel(), True
515
516
  if should_use_ollama_model():
516
517
  return MultimodalOllamaModel(), True
518
+ elif should_use_azure_openai():
519
+ return MultimodalAzureOpenAIMLLMModel(model_name=model), True
517
520
  elif isinstance(model, str) or model is None:
518
521
  return MultimodalOpenAIModel(model=model), True
519
522
  raise TypeError(
@@ -21,6 +21,7 @@ from deepeval.models.mlllms import (
21
21
  MultimodalOpenAIModel,
22
22
  MultimodalOllamaModel,
23
23
  MultimodalGeminiModel,
24
+ MultimodalAzureOpenAIMLLMModel,
24
25
  )
25
26
  from deepeval.models.embedding_models import (
26
27
  OpenAIEmbeddingModel,
@@ -48,6 +49,7 @@ __all__ = [
48
49
  "MultimodalOpenAIModel",
49
50
  "MultimodalOllamaModel",
50
51
  "MultimodalGeminiModel",
52
+ "MultimodalAzureOpenAIMLLMModel",
51
53
  "OpenAIEmbeddingModel",
52
54
  "AzureOpenAIEmbeddingModel",
53
55
  "LocalEmbeddingModel",
@@ -1,16 +1,15 @@
1
1
  from typing import Dict, List, Optional
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from deepeval.key_handler import (
4
- EmbeddingKeyValues,
5
- ModelKeyValues,
6
- KEY_FILE_HANDLER,
7
- )
3
+ from pydantic import SecretStr
4
+
5
+ from deepeval.config.settings import get_settings
8
6
  from deepeval.models import DeepEvalBaseEmbeddingModel
9
7
  from deepeval.models.retry_policy import (
10
8
  create_retry_decorator,
11
9
  sdk_retries_for,
12
10
  )
13
11
  from deepeval.constants import ProviderSlug as PS
12
+ from deepeval.models.utils import require_secret_api_key
14
13
 
15
14
 
16
15
  retry_azure = create_retry_decorator(PS.AZURE)
@@ -27,18 +26,25 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
27
26
  generation_kwargs: Optional[Dict] = None,
28
27
  **client_kwargs,
29
28
  ):
30
- self.openai_api_key = openai_api_key or KEY_FILE_HANDLER.fetch_data(
31
- ModelKeyValues.AZURE_OPENAI_API_KEY
32
- )
29
+ settings = get_settings()
30
+
31
+ if openai_api_key is not None:
32
+ # keep it secret, keep it safe from serializings, logging and alike
33
+ self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
34
+ else:
35
+ self.openai_api_key = settings.AZURE_OPENAI_API_KEY
36
+
33
37
  self.openai_api_version = (
34
- openai_api_version
35
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
38
+ openai_api_version or settings.OPENAI_API_VERSION
36
39
  )
37
- self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
38
- ModelKeyValues.AZURE_OPENAI_ENDPOINT
40
+ self.azure_endpoint = (
41
+ azure_endpoint
42
+ or settings.AZURE_OPENAI_ENDPOINT
43
+ and str(settings.AZURE_OPENAI_ENDPOINT)
39
44
  )
40
- self.azure_deployment = azure_deployment or KEY_FILE_HANDLER.fetch_data(
41
- EmbeddingKeyValues.AZURE_EMBEDDING_DEPLOYMENT_NAME
45
+
46
+ self.azure_deployment = (
47
+ azure_deployment or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
42
48
  )
43
49
  self.client_kwargs = client_kwargs or {}
44
50
  self.model_name = model or self.azure_deployment
@@ -86,12 +92,19 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
86
92
  return self._build_client(AsyncAzureOpenAI)
87
93
 
88
94
  def _build_client(self, cls):
95
+ api_key = require_secret_api_key(
96
+ self.openai_api_key,
97
+ provider_label="AzureOpenAI",
98
+ env_var_name="AZURE_OPENAI_API_KEY",
99
+ param_hint="`openai_api_key` to AzureOpenAIEmbeddingModel(...)",
100
+ )
101
+
89
102
  client_kwargs = self.client_kwargs.copy()
90
103
  if not sdk_retries_for(PS.AZURE):
91
104
  client_kwargs["max_retries"] = 0
92
105
 
93
106
  client_init_kwargs = dict(
94
- api_key=self.openai_api_key,
107
+ api_key=api_key,
95
108
  api_version=self.openai_api_version,
96
109
  azure_endpoint=self.azure_endpoint,
97
110
  azure_deployment=self.azure_deployment,
@@ -1,7 +1,9 @@
1
1
  from openai import OpenAI, AsyncOpenAI
2
2
  from typing import Dict, List, Optional
3
+ from pydantic import SecretStr
3
4
 
4
- from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
+ from deepeval.config.settings import get_settings
6
+ from deepeval.models.utils import require_secret_api_key
5
7
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
8
  from deepeval.models.retry_policy import (
7
9
  create_retry_decorator,
@@ -23,15 +25,19 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
23
25
  generation_kwargs: Optional[Dict] = None,
24
26
  **client_kwargs,
25
27
  ):
26
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
27
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
28
- )
29
- self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
30
- EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
31
- )
32
- self.model_name = model or KEY_FILE_HANDLER.fetch_data(
33
- EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
28
+ settings = get_settings()
29
+ if api_key is not None:
30
+ # keep it secret, keep it safe from serializings, logging and alike
31
+ self.api_key: SecretStr | None = SecretStr(api_key)
32
+ else:
33
+ self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
34
+
35
+ self.base_url = (
36
+ base_url
37
+ or settings.LOCAL_EMBEDDING_BASE_URL
38
+ and str(settings.LOCAL_EMBEDDING_BASE_URL)
34
39
  )
40
+ self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
35
41
  self.client_kwargs = client_kwargs or {}
36
42
  self.generation_kwargs = generation_kwargs or {}
37
43
  super().__init__(self.model_name)
@@ -81,12 +87,19 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
81
87
  return self._build_client(AsyncOpenAI)
82
88
 
83
89
  def _build_client(self, cls):
90
+ api_key = require_secret_api_key(
91
+ self.api_key,
92
+ provider_label="OpenAI",
93
+ env_var_name="LOCAL_EMBEDDING_API_KEY",
94
+ param_hint="`api_key` to LocalEmbeddingModel(...)",
95
+ )
96
+
84
97
  client_kwargs = self.client_kwargs.copy()
85
98
  if not sdk_retries_for(PS.LOCAL):
86
99
  client_kwargs["max_retries"] = 0
87
100
 
88
101
  client_init_kwargs = dict(
89
- api_key=self.api_key,
102
+ api_key=api_key,
90
103
  base_url=self.base_url,
91
104
  **client_kwargs,
92
105
  )
@@ -1,7 +1,7 @@
1
1
  from ollama import Client, AsyncClient
2
2
  from typing import List, Optional, Dict
3
3
 
4
- from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
4
+ from deepeval.config.settings import get_settings
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
6
  from deepeval.models.retry_policy import (
7
7
  create_retry_decorator,
@@ -20,12 +20,14 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
20
20
  generation_kwargs: Optional[Dict] = None,
21
21
  **client_kwargs,
22
22
  ):
23
- self.host = host or KEY_FILE_HANDLER.fetch_data(
24
- EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
25
- )
26
- self.model_name = model or KEY_FILE_HANDLER.fetch_data(
27
- EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
23
+ settings = get_settings()
24
+
25
+ self.host = (
26
+ host
27
+ or settings.LOCAL_EMBEDDING_BASE_URL
28
+ and str(settings.LOCAL_EMBEDDING_BASE_URL)
28
29
  )
30
+ self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
29
31
  self.client_kwargs = client_kwargs or {}
30
32
  self.generation_kwargs = generation_kwargs or {}
31
33
  super().__init__(self.model_name)
@@ -1,5 +1,9 @@
1
1
  from typing import Dict, Optional, List
2
2
  from openai import OpenAI, AsyncOpenAI
3
+ from pydantic import SecretStr
4
+
5
+ from deepeval.config.settings import get_settings
6
+ from deepeval.models.utils import require_secret_api_key
3
7
  from deepeval.models import DeepEvalBaseEmbeddingModel
4
8
  from deepeval.models.retry_policy import (
5
9
  create_retry_decorator,
@@ -27,7 +31,12 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
27
31
  generation_kwargs: Optional[Dict] = None,
28
32
  **client_kwargs,
29
33
  ):
30
- self.openai_api_key = openai_api_key
34
+ if openai_api_key is not None:
35
+ # keep it secret, keep it safe from serializings, logging and alike
36
+ self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
37
+ else:
38
+ self.openai_api_key = get_settings().OPENAI_API_KEY
39
+
31
40
  self.model_name = model if model else default_openai_embedding_model
32
41
  if self.model_name not in valid_openai_embedding_models:
33
42
  raise ValueError(
@@ -81,12 +90,19 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
81
90
  return self._build_client(AsyncOpenAI)
82
91
 
83
92
  def _build_client(self, cls):
93
+ openai_api_key = require_secret_api_key(
94
+ self.openai_api_key,
95
+ provider_label="OpenAI",
96
+ env_var_name="OPENAI_API_KEY",
97
+ param_hint="`openai_api_key` to OpenAIEmbeddingModel(...)",
98
+ )
99
+
84
100
  client_kwargs = self.client_kwargs.copy()
85
101
  if not sdk_retries_for(PS.OPENAI):
86
102
  client_kwargs["max_retries"] = 0
87
103
 
88
104
  client_init_kwargs = dict(
89
- api_key=self.openai_api_key,
105
+ api_key=openai_api_key,
90
106
  **client_kwargs,
91
107
  )
92
108
  try:
@@ -2,7 +2,7 @@ import warnings
2
2
 
3
3
  from typing import Optional, Tuple, Union, Dict
4
4
  from anthropic import Anthropic, AsyncAnthropic
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, SecretStr
6
6
 
7
7
  from deepeval.models import DeepEvalBaseLLM
8
8
  from deepeval.models.llms.utils import trim_and_load_json
@@ -10,7 +10,7 @@ from deepeval.models.retry_policy import (
10
10
  create_retry_decorator,
11
11
  sdk_retries_for,
12
12
  )
13
- from deepeval.models.utils import parse_model_name
13
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
14
14
  from deepeval.config.settings import get_settings
15
15
  from deepeval.constants import ProviderSlug as PS
16
16
 
@@ -41,7 +41,14 @@ class AnthropicModel(DeepEvalBaseLLM):
41
41
  **kwargs,
42
42
  ):
43
43
  model_name = parse_model_name(model)
44
- self._anthropic_api_key = _anthropic_api_key
44
+
45
+ if _anthropic_api_key is not None:
46
+ # keep it secret, keep it safe from serializings, logging and alike
47
+ self._anthropic_api_key: SecretStr | None = SecretStr(
48
+ _anthropic_api_key
49
+ )
50
+ else:
51
+ self._anthropic_api_key = get_settings().ANTHROPIC_API_KEY
45
52
 
46
53
  if temperature < 0:
47
54
  raise ValueError("Temperature must be >= 0.")
@@ -155,9 +162,14 @@ class AnthropicModel(DeepEvalBaseLLM):
155
162
  return kwargs
156
163
 
157
164
  def _build_client(self, cls):
158
- settings = get_settings()
165
+ api_key = require_secret_api_key(
166
+ self._anthropic_api_key,
167
+ provider_label="Anthropic",
168
+ env_var_name="ANTHROPIC_API_KEY",
169
+ param_hint="`_anthropic_api_key` to AnthropicModel(...)",
170
+ )
159
171
  kw = dict(
160
- api_key=settings.ANTHROPIC_API_KEY or self._anthropic_api_key,
172
+ api_key=api_key,
161
173
  **self._client_kwargs(),
162
174
  )
163
175
  try:
@@ -1,10 +1,10 @@
1
1
  from openai.types.chat.chat_completion import ChatCompletion
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
3
  from typing import Optional, Tuple, Union, Dict
4
- from pydantic import BaseModel
4
+ from pydantic import BaseModel, SecretStr
5
5
 
6
+ from deepeval.config.settings import get_settings
6
7
  from deepeval.models import DeepEvalBaseLLM
7
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
8
8
  from deepeval.models.llms.openai_model import (
9
9
  structured_outputs_models,
10
10
  json_mode_models,
@@ -16,7 +16,7 @@ from deepeval.models.retry_policy import (
16
16
  )
17
17
 
18
18
  from deepeval.models.llms.utils import trim_and_load_json
19
- from deepeval.models.utils import parse_model_name
19
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
20
20
  from deepeval.constants import ProviderSlug as PS
21
21
 
22
22
 
@@ -35,24 +35,29 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
35
35
  generation_kwargs: Optional[Dict] = None,
36
36
  **kwargs,
37
37
  ):
38
+ settings = get_settings()
39
+
38
40
  # fetch Azure deployment parameters
39
- model_name = model_name or KEY_FILE_HANDLER.fetch_data(
40
- ModelKeyValues.AZURE_MODEL_NAME
41
- )
42
- self.deployment_name = deployment_name or KEY_FILE_HANDLER.fetch_data(
43
- ModelKeyValues.AZURE_DEPLOYMENT_NAME
44
- )
45
- self.azure_openai_api_key = (
46
- azure_openai_api_key
47
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.AZURE_OPENAI_API_KEY)
48
- )
41
+ model_name = model_name or settings.AZURE_MODEL_NAME
42
+ self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
43
+
44
+ if azure_openai_api_key is not None:
45
+ # keep it secret, keep it safe from serializings, logging and alike
46
+ self.azure_openai_api_key: SecretStr | None = SecretStr(
47
+ azure_openai_api_key
48
+ )
49
+ else:
50
+ self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
51
+
49
52
  self.openai_api_version = (
50
- openai_api_version
51
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
53
+ openai_api_version or settings.OPENAI_API_VERSION
52
54
  )
53
- self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
54
- ModelKeyValues.AZURE_OPENAI_ENDPOINT
55
+ self.azure_endpoint = (
56
+ azure_endpoint
57
+ or settings.AZURE_OPENAI_ENDPOINT
58
+ and str(settings.AZURE_OPENAI_ENDPOINT)
55
59
  )
60
+
56
61
  if temperature < 0:
57
62
  raise ValueError("Temperature must be >= 0.")
58
63
  self.temperature = temperature
@@ -270,8 +275,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
270
275
  return kwargs
271
276
 
272
277
  def _build_client(self, cls):
278
+ api_key = require_secret_api_key(
279
+ self.azure_openai_api_key,
280
+ provider_label="AzureOpenAI",
281
+ env_var_name="AZURE_OPENAI_API_KEY",
282
+ param_hint="`azure_openai_api_key` to AzureOpenAIModel(...)",
283
+ )
284
+
273
285
  kw = dict(
274
- api_key=self.azure_openai_api_key,
286
+ api_key=api_key,
275
287
  api_version=self.openai_api_version,
276
288
  azure_endpoint=self.azure_endpoint,
277
289
  azure_deployment=self.deployment_name,
@@ -1,9 +1,10 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, SecretStr
4
4
 
5
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
5
+ from deepeval.config.settings import get_settings
6
6
  from deepeval.models.llms.utils import trim_and_load_json
7
+ from deepeval.models.utils import require_secret_api_key
7
8
  from deepeval.models import DeepEvalBaseLLM
8
9
  from deepeval.models.retry_policy import (
9
10
  create_retry_decorator,
@@ -36,25 +37,27 @@ class DeepSeekModel(DeepEvalBaseLLM):
36
37
  generation_kwargs: Optional[Dict] = None,
37
38
  **kwargs,
38
39
  ):
39
- model_name = model or KEY_FILE_HANDLER.fetch_data(
40
- ModelKeyValues.DEEPSEEK_MODEL_NAME
41
- )
40
+ settings = get_settings()
41
+
42
+ model_name = model or settings.DEEPSEEK_MODEL_NAME
42
43
  if model_name not in model_pricing:
43
44
  raise ValueError(
44
45
  f"Invalid model. Available DeepSeek models: {', '.join(model_pricing.keys())}"
45
46
  )
46
- temperature_from_key = KEY_FILE_HANDLER.fetch_data(
47
- ModelKeyValues.TEMPERATURE
48
- )
47
+ temperature_from_key = settings.TEMPERATURE
49
48
  if temperature_from_key is None:
50
49
  self.temperature = temperature
51
50
  else:
52
51
  self.temperature = float(temperature_from_key)
53
52
  if self.temperature < 0:
54
53
  raise ValueError("Temperature must be >= 0.")
55
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
56
- ModelKeyValues.DEEPSEEK_API_KEY
57
- )
54
+
55
+ if api_key is not None:
56
+ # keep it secret, keep it safe from serializings, logging and alike
57
+ self.api_key: SecretStr | None = SecretStr(api_key)
58
+ else:
59
+ self.api_key = settings.DEEPSEEK_API_KEY
60
+
58
61
  self.base_url = "https://api.deepseek.com"
59
62
  self.kwargs = kwargs
60
63
  self.generation_kwargs = generation_kwargs or {}
@@ -167,8 +170,15 @@ class DeepSeekModel(DeepEvalBaseLLM):
167
170
  return kwargs
168
171
 
169
172
  def _build_client(self, cls):
173
+ api_key = require_secret_api_key(
174
+ self.api_key,
175
+ provider_label="DeepSeek",
176
+ env_var_name="DEEPSEEK_API_KEY",
177
+ param_hint="`api_key` to DeepSeekModel(...)",
178
+ )
179
+
170
180
  kw = dict(
171
- api_key=self.api_key,
181
+ api_key=api_key,
172
182
  base_url=self.base_url,
173
183
  **self._client_kwargs(),
174
184
  )