deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/dataset/golden.py +54 -2
  5. deepeval/evaluate/configs.py +1 -1
  6. deepeval/evaluate/evaluate.py +16 -8
  7. deepeval/evaluate/execute.py +74 -27
  8. deepeval/evaluate/utils.py +26 -22
  9. deepeval/integrations/pydantic_ai/agent.py +19 -2
  10. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. deepeval/metrics/__init__.py +14 -12
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  13. deepeval/metrics/answer_relevancy/template.py +188 -92
  14. deepeval/metrics/argument_correctness/template.py +2 -2
  15. deepeval/metrics/base_metric.py +2 -5
  16. deepeval/metrics/bias/template.py +3 -3
  17. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  18. deepeval/metrics/contextual_precision/template.py +115 -66
  19. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  20. deepeval/metrics/contextual_recall/template.py +106 -55
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  22. deepeval/metrics/contextual_relevancy/template.py +87 -58
  23. deepeval/metrics/conversation_completeness/template.py +2 -2
  24. deepeval/metrics/conversational_dag/templates.py +4 -4
  25. deepeval/metrics/conversational_g_eval/template.py +4 -3
  26. deepeval/metrics/dag/templates.py +5 -5
  27. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  28. deepeval/metrics/faithfulness/schema.py +1 -1
  29. deepeval/metrics/faithfulness/template.py +200 -115
  30. deepeval/metrics/g_eval/utils.py +2 -2
  31. deepeval/metrics/hallucination/template.py +4 -4
  32. deepeval/metrics/indicator.py +4 -4
  33. deepeval/metrics/misuse/template.py +2 -2
  34. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  35. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  36. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  37. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  38. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  39. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  40. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  41. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  42. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  43. deepeval/metrics/non_advice/template.py +2 -2
  44. deepeval/metrics/pii_leakage/template.py +2 -2
  45. deepeval/metrics/prompt_alignment/template.py +4 -4
  46. deepeval/metrics/ragas.py +3 -3
  47. deepeval/metrics/role_violation/template.py +2 -2
  48. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  49. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  50. deepeval/metrics/toxicity/template.py +4 -4
  51. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  52. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  53. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  54. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  55. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  56. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  57. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  58. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  59. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  60. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  61. deepeval/metrics/turn_faithfulness/template.py +218 -0
  62. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  63. deepeval/metrics/turn_relevancy/template.py +2 -2
  64. deepeval/metrics/utils.py +39 -58
  65. deepeval/models/__init__.py +0 -12
  66. deepeval/models/base_model.py +16 -38
  67. deepeval/models/embedding_models/__init__.py +7 -0
  68. deepeval/models/embedding_models/azure_embedding_model.py +69 -32
  69. deepeval/models/embedding_models/local_embedding_model.py +39 -22
  70. deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
  71. deepeval/models/embedding_models/openai_embedding_model.py +50 -15
  72. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  73. deepeval/models/llms/anthropic_model.py +53 -20
  74. deepeval/models/llms/azure_model.py +140 -43
  75. deepeval/models/llms/deepseek_model.py +38 -23
  76. deepeval/models/llms/gemini_model.py +222 -103
  77. deepeval/models/llms/grok_model.py +39 -27
  78. deepeval/models/llms/kimi_model.py +39 -23
  79. deepeval/models/llms/litellm_model.py +103 -45
  80. deepeval/models/llms/local_model.py +35 -22
  81. deepeval/models/llms/ollama_model.py +129 -17
  82. deepeval/models/llms/openai_model.py +151 -50
  83. deepeval/models/llms/portkey_model.py +149 -0
  84. deepeval/models/llms/utils.py +5 -3
  85. deepeval/models/retry_policy.py +17 -14
  86. deepeval/models/utils.py +94 -4
  87. deepeval/optimizer/__init__.py +5 -0
  88. deepeval/optimizer/algorithms/__init__.py +6 -0
  89. deepeval/optimizer/algorithms/base.py +29 -0
  90. deepeval/optimizer/algorithms/configs.py +18 -0
  91. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  92. deepeval/optimizer/algorithms/copro/copro.py +836 -0
  93. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  94. deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
  95. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  96. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  97. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  98. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  99. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  100. deepeval/optimizer/algorithms/simba/simba.py +999 -0
  101. deepeval/optimizer/algorithms/simba/types.py +15 -0
  102. deepeval/optimizer/configs.py +31 -0
  103. deepeval/optimizer/policies.py +227 -0
  104. deepeval/optimizer/prompt_optimizer.py +263 -0
  105. deepeval/optimizer/rewriter/__init__.py +5 -0
  106. deepeval/optimizer/rewriter/rewriter.py +124 -0
  107. deepeval/optimizer/rewriter/utils.py +214 -0
  108. deepeval/optimizer/scorer/__init__.py +5 -0
  109. deepeval/optimizer/scorer/base.py +86 -0
  110. deepeval/optimizer/scorer/scorer.py +316 -0
  111. deepeval/optimizer/scorer/utils.py +30 -0
  112. deepeval/optimizer/types.py +148 -0
  113. deepeval/optimizer/utils.py +480 -0
  114. deepeval/prompt/prompt.py +7 -6
  115. deepeval/test_case/__init__.py +1 -3
  116. deepeval/test_case/api.py +12 -10
  117. deepeval/test_case/conversational_test_case.py +19 -1
  118. deepeval/test_case/llm_test_case.py +152 -1
  119. deepeval/test_case/utils.py +4 -8
  120. deepeval/test_run/api.py +15 -14
  121. deepeval/test_run/cache.py +2 -0
  122. deepeval/test_run/test_run.py +9 -4
  123. deepeval/tracing/patchers.py +9 -4
  124. deepeval/tracing/tracing.py +2 -2
  125. deepeval/utils.py +89 -0
  126. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  127. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
  128. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  129. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  130. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  131. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  132. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  133. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  134. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  135. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  136. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  137. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  138. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  139. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  140. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  141. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  142. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  143. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  144. deepeval/models/mlllms/__init__.py +0 -4
  145. deepeval/models/mlllms/azure_model.py +0 -334
  146. deepeval/models/mlllms/gemini_model.py +0 -284
  147. deepeval/models/mlllms/ollama_model.py +0 -144
  148. deepeval/models/mlllms/openai_model.py +0 -258
  149. deepeval/test_case/mllm_test_case.py +0 -170
  150. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  152. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  153. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  154. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  155. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  156. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,16 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
2
  from openai import OpenAI, AsyncOpenAI
3
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, SecretStr
4
4
 
5
+ from deepeval.config.settings import get_settings
5
6
  from deepeval.models.retry_policy import (
6
7
  create_retry_decorator,
7
8
  sdk_retries_for,
8
9
  )
9
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
10
10
  from deepeval.models.llms.utils import trim_and_load_json
11
+ from deepeval.models.utils import (
12
+ require_secret_api_key,
13
+ )
11
14
  from deepeval.models import DeepEvalBaseLLM
12
15
  from deepeval.constants import ProviderSlug as PS
13
16
 
@@ -73,35 +76,39 @@ model_pricing = {
73
76
  class KimiModel(DeepEvalBaseLLM):
74
77
  def __init__(
75
78
  self,
76
- api_key: Optional[str] = None,
77
79
  model: Optional[str] = None,
80
+ api_key: Optional[str] = None,
78
81
  temperature: float = 0,
79
82
  generation_kwargs: Optional[Dict] = None,
80
83
  **kwargs,
81
84
  ):
82
- model_name = model or KEY_FILE_HANDLER.fetch_data(
83
- ModelKeyValues.MOONSHOT_MODEL_NAME
84
- )
85
- if model_name not in model_pricing:
85
+ settings = get_settings()
86
+
87
+ model = model or settings.MOONSHOT_MODEL_NAME
88
+ if model not in model_pricing:
86
89
  raise ValueError(
87
90
  f"Invalid model. Available Moonshot models: {', '.join(model_pricing.keys())}"
88
91
  )
89
- temperature_from_key = KEY_FILE_HANDLER.fetch_data(
90
- ModelKeyValues.TEMPERATURE
91
- )
92
+
93
+ temperature_from_key = settings.TEMPERATURE
92
94
  if temperature_from_key is None:
93
95
  self.temperature = temperature
94
96
  else:
95
97
  self.temperature = float(temperature_from_key)
96
98
  if self.temperature < 0:
97
99
  raise ValueError("Temperature must be >= 0.")
98
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
99
- ModelKeyValues.MOONSHOT_API_KEY
100
- )
100
+
101
+ if api_key is not None:
102
+ # keep it secret, keep it safe from serializings, logging and alike
103
+ self.api_key: SecretStr | None = SecretStr(api_key)
104
+ else:
105
+ self.api_key = settings.MOONSHOT_API_KEY
106
+
101
107
  self.base_url = "https://api.moonshot.cn/v1"
108
+ # Keep sanitized kwargs for client call to strip legacy keys
102
109
  self.kwargs = kwargs
103
110
  self.generation_kwargs = generation_kwargs or {}
104
- super().__init__(model_name)
111
+ super().__init__(model)
105
112
 
106
113
  ###############################################
107
114
  # Other generate functions
@@ -111,10 +118,11 @@ class KimiModel(DeepEvalBaseLLM):
111
118
  def generate(
112
119
  self, prompt: str, schema: Optional[BaseModel] = None
113
120
  ) -> Tuple[Union[str, Dict], float]:
121
+
114
122
  client = self.load_model(async_mode=False)
115
- if schema and self.model_name in json_mode_models:
123
+ if schema and self.name in json_mode_models:
116
124
  completion = client.chat.completions.create(
117
- model=self.model_name,
125
+ model=self.name,
118
126
  messages=[{"role": "user", "content": prompt}],
119
127
  response_format={"type": "json_object"},
120
128
  temperature=self.temperature,
@@ -130,7 +138,7 @@ class KimiModel(DeepEvalBaseLLM):
130
138
  return schema.model_validate(json_output), cost
131
139
 
132
140
  completion = client.chat.completions.create(
133
- model=self.model_name,
141
+ model=self.name,
134
142
  messages=[{"role": "user", "content": prompt}],
135
143
  **self.generation_kwargs,
136
144
  )
@@ -149,10 +157,11 @@ class KimiModel(DeepEvalBaseLLM):
149
157
  async def a_generate(
150
158
  self, prompt: str, schema: Optional[BaseModel] = None
151
159
  ) -> Tuple[Union[str, Dict], float]:
160
+
152
161
  client = self.load_model(async_mode=True)
153
- if schema and self.model_name in json_mode_models:
162
+ if schema and self.name in json_mode_models:
154
163
  completion = await client.chat.completions.create(
155
- model=self.model_name,
164
+ model=self.name,
156
165
  messages=[{"role": "user", "content": prompt}],
157
166
  response_format={"type": "json_object"},
158
167
  temperature=self.temperature,
@@ -168,7 +177,7 @@ class KimiModel(DeepEvalBaseLLM):
168
177
  return schema.model_validate(json_output), cost
169
178
 
170
179
  completion = await client.chat.completions.create(
171
- model=self.model_name,
180
+ model=self.name,
172
181
  messages=[{"role": "user", "content": prompt}],
173
182
  **self.generation_kwargs,
174
183
  )
@@ -192,7 +201,7 @@ class KimiModel(DeepEvalBaseLLM):
192
201
  input_tokens: int,
193
202
  output_tokens: int,
194
203
  ) -> float:
195
- pricing = model_pricing.get(self.model_name, model_pricing)
204
+ pricing = model_pricing.get(self.name, model_pricing)
196
205
  input_cost = input_tokens * pricing["input"]
197
206
  output_cost = output_tokens * pricing["output"]
198
207
  return input_cost + output_cost
@@ -218,8 +227,15 @@ class KimiModel(DeepEvalBaseLLM):
218
227
  return kwargs
219
228
 
220
229
  def _build_client(self, cls):
230
+ api_key = require_secret_api_key(
231
+ self.api_key,
232
+ provider_label="Kimi",
233
+ env_var_name="MOONSHOT_API_KEY",
234
+ param_hint="`api_key` to KimiModel(...)",
235
+ )
236
+
221
237
  kw = dict(
222
- api_key=self.api_key,
238
+ api_key=api_key,
223
239
  base_url=self.base_url,
224
240
  **self._client_kwargs(),
225
241
  )
@@ -233,4 +249,4 @@ class KimiModel(DeepEvalBaseLLM):
233
249
  raise
234
250
 
235
251
  def get_model_name(self):
236
- return f"{self.model_name}"
252
+ return f"{self.name} (KIMI)"
@@ -1,6 +1,6 @@
1
- from typing import Optional, Tuple, Union, Dict, List, Any
2
- from pydantic import BaseModel
3
1
  import logging
2
+ from typing import Optional, Tuple, Union, Dict, List, Any
3
+ from pydantic import BaseModel, SecretStr
4
4
  from tenacity import (
5
5
  retry,
6
6
  stop_after_attempt,
@@ -8,11 +8,14 @@ from tenacity import (
8
8
  wait_exponential_jitter,
9
9
  RetryCallState,
10
10
  )
11
- import os
12
11
 
12
+ from deepeval.config.settings import get_settings
13
+ from deepeval.models.utils import (
14
+ require_secret_api_key,
15
+ normalize_kwargs_and_extract_aliases,
16
+ )
13
17
  from deepeval.models import DeepEvalBaseLLM
14
18
  from deepeval.models.llms.utils import trim_and_load_json
15
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
16
19
 
17
20
 
18
21
  def log_retry_error(retry_state: RetryCallState):
@@ -27,6 +30,10 @@ retryable_exceptions = (
27
30
  Exception, # LiteLLM handles specific exceptions internally
28
31
  )
29
32
 
33
+ _ALIAS_MAP = {
34
+ "base_url": ["api_base"],
35
+ }
36
+
30
37
 
31
38
  class LiteLLMModel(DeepEvalBaseLLM):
32
39
  EXP_BASE: int = 2
@@ -39,47 +46,66 @@ class LiteLLMModel(DeepEvalBaseLLM):
39
46
  self,
40
47
  model: Optional[str] = None,
41
48
  api_key: Optional[str] = None,
42
- api_base: Optional[str] = None,
49
+ base_url: Optional[str] = None,
43
50
  temperature: float = 0,
44
51
  generation_kwargs: Optional[Dict] = None,
45
52
  **kwargs,
46
53
  ):
47
- from litellm import completion, acompletion, get_llm_provider
48
54
 
49
- # Get model name from parameter or key file
50
- model_name = model or KEY_FILE_HANDLER.fetch_data(
51
- ModelKeyValues.LITELLM_MODEL_NAME
55
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
56
+ "LiteLLMModel",
57
+ kwargs,
58
+ _ALIAS_MAP,
52
59
  )
53
- if not model_name:
60
+
61
+ # re-map depricated keywords to re-named positional args
62
+ if base_url is None and "base_url" in alias_values:
63
+ base_url = alias_values["base_url"]
64
+
65
+ settings = get_settings()
66
+ # Get model name from parameter or key file
67
+ model = model or settings.LITELLM_MODEL_NAME
68
+ if not model:
54
69
  raise ValueError(
55
70
  "Model name must be provided either through parameter or set-litellm command"
56
71
  )
57
72
 
58
- # Get API key from parameter, key file, or environment variable
59
- self.api_key = (
60
- api_key
61
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.LITELLM_API_KEY)
62
- or os.getenv("LITELLM_PROXY_API_KEY")
63
- or os.getenv("OPENAI_API_KEY")
64
- or os.getenv("ANTHROPIC_API_KEY")
65
- or os.getenv("GOOGLE_API_KEY")
66
- )
73
+ # Get API key from parameter, or settings
74
+ if api_key is not None:
75
+ # keep it secret, keep it safe from serializings, logging and aolike
76
+ self.api_key: SecretStr | None = SecretStr(api_key)
77
+ else:
78
+ self.api_key = (
79
+ settings.LITELLM_API_KEY
80
+ or settings.LITELLM_PROXY_API_KEY
81
+ or settings.OPENAI_API_KEY
82
+ or settings.ANTHROPIC_API_KEY
83
+ or settings.GOOGLE_API_KEY
84
+ )
67
85
 
68
86
  # Get API base from parameter, key file, or environment variable
69
- self.api_base = (
70
- api_base
71
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.LITELLM_API_BASE)
72
- or os.getenv("LITELLM_API_BASE")
73
- or os.getenv("LITELLM_PROXY_API_BASE")
87
+ self.base_url = (
88
+ base_url
89
+ or (
90
+ str(settings.LITELLM_API_BASE)
91
+ if settings.LITELLM_API_BASE is not None
92
+ else None
93
+ )
94
+ or (
95
+ str(settings.LITELLM_PROXY_API_BASE)
96
+ if settings.LITELLM_PROXY_API_BASE is not None
97
+ else None
98
+ )
74
99
  )
75
100
 
76
101
  if temperature < 0:
77
102
  raise ValueError("Temperature must be >= 0.")
78
103
  self.temperature = temperature
79
- self.kwargs = kwargs
104
+ # Keep sanitized kwargs for client call to strip legacy keys
105
+ self.kwargs = normalized_kwargs
80
106
  self.generation_kwargs = generation_kwargs or {}
81
107
  self.evaluation_cost = 0.0 # Initialize cost to 0.0
82
- super().__init__(model_name)
108
+ super().__init__(model)
83
109
 
84
110
  @retry(
85
111
  wait=wait_exponential_jitter(
@@ -92,18 +118,25 @@ class LiteLLMModel(DeepEvalBaseLLM):
92
118
  def generate(
93
119
  self, prompt: str, schema: Optional[BaseModel] = None
94
120
  ) -> Union[str, Dict, Tuple[str, float]]:
121
+
95
122
  from litellm import completion
96
123
 
97
124
  completion_params = {
98
- "model": self.model_name,
125
+ "model": self.name,
99
126
  "messages": [{"role": "user", "content": prompt}],
100
127
  "temperature": self.temperature,
101
128
  }
102
129
 
103
130
  if self.api_key:
104
- completion_params["api_key"] = self.api_key
105
- if self.api_base:
106
- completion_params["api_base"] = self.api_base
131
+ api_key = require_secret_api_key(
132
+ self.api_key,
133
+ provider_label="LiteLLM",
134
+ env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
135
+ param_hint="`api_key` to LiteLLMModel(...)",
136
+ )
137
+ completion_params["api_key"] = api_key
138
+ if self.base_url:
139
+ completion_params["api_base"] = self.base_url
107
140
 
108
141
  # Add schema if provided
109
142
  if schema:
@@ -141,18 +174,25 @@ class LiteLLMModel(DeepEvalBaseLLM):
141
174
  async def a_generate(
142
175
  self, prompt: str, schema: Optional[BaseModel] = None
143
176
  ) -> Union[str, Dict, Tuple[str, float]]:
177
+
144
178
  from litellm import acompletion
145
179
 
146
180
  completion_params = {
147
- "model": self.model_name,
181
+ "model": self.name,
148
182
  "messages": [{"role": "user", "content": prompt}],
149
183
  "temperature": self.temperature,
150
184
  }
151
185
 
152
186
  if self.api_key:
153
- completion_params["api_key"] = self.api_key
154
- if self.api_base:
155
- completion_params["api_base"] = self.api_base
187
+ api_key = require_secret_api_key(
188
+ self.api_key,
189
+ provider_label="LiteLLM",
190
+ env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
191
+ param_hint="`api_key` to LiteLLMModel(...)",
192
+ )
193
+ completion_params["api_key"] = api_key
194
+ if self.base_url:
195
+ completion_params["api_base"] = self.base_url
156
196
 
157
197
  # Add schema if provided
158
198
  if schema:
@@ -195,12 +235,18 @@ class LiteLLMModel(DeepEvalBaseLLM):
195
235
  from litellm import completion
196
236
 
197
237
  try:
238
+ api_key = require_secret_api_key(
239
+ self.api_key,
240
+ provider_label="LiteLLM",
241
+ env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
242
+ param_hint="`api_key` to LiteLLMModel(...)",
243
+ )
198
244
  completion_params = {
199
- "model": self.model_name,
245
+ "model": self.name,
200
246
  "messages": [{"role": "user", "content": prompt}],
201
247
  "temperature": self.temperature,
202
- "api_key": self.api_key,
203
- "api_base": self.api_base,
248
+ "api_key": api_key,
249
+ "api_base": self.base_url,
204
250
  "logprobs": True,
205
251
  "top_logprobs": top_logprobs,
206
252
  }
@@ -230,12 +276,18 @@ class LiteLLMModel(DeepEvalBaseLLM):
230
276
  from litellm import acompletion
231
277
 
232
278
  try:
279
+ api_key = require_secret_api_key(
280
+ self.api_key,
281
+ provider_label="LiteLLM",
282
+ env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
283
+ param_hint="`api_key` to LiteLLMModel(...)",
284
+ )
233
285
  completion_params = {
234
- "model": self.model_name,
286
+ "model": self.name,
235
287
  "messages": [{"role": "user", "content": prompt}],
236
288
  "temperature": self.temperature,
237
- "api_key": self.api_key,
238
- "api_base": self.api_base,
289
+ "api_key": api_key,
290
+ "api_base": self.base_url,
239
291
  "logprobs": True,
240
292
  "top_logprobs": top_logprobs,
241
293
  }
@@ -263,13 +315,19 @@ class LiteLLMModel(DeepEvalBaseLLM):
263
315
  from litellm import completion
264
316
 
265
317
  try:
318
+ api_key = require_secret_api_key(
319
+ self.api_key,
320
+ provider_label="LiteLLM",
321
+ env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
322
+ param_hint="`api_key` to LiteLLMModel(...)",
323
+ )
266
324
  completion_params = {
267
- "model": self.model_name,
325
+ "model": self.name,
268
326
  "messages": [{"role": "user", "content": prompt}],
269
327
  "temperature": temperature,
270
328
  "n": n,
271
- "api_key": self.api_key,
272
- "api_base": self.api_base,
329
+ "api_key": api_key,
330
+ "api_base": self.base_url,
273
331
  }
274
332
  completion_params.update(self.kwargs)
275
333
 
@@ -315,8 +373,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
315
373
  def get_model_name(self) -> str:
316
374
  from litellm import get_llm_provider
317
375
 
318
- provider = get_llm_provider(self.model_name)
319
- return f"{self.model_name} ({provider})"
376
+ provider = get_llm_provider(self.name)
377
+ return f"{self.name} ({provider})"
320
378
 
321
379
  def load_model(self, async_mode: bool = False):
322
380
  """
@@ -1,15 +1,18 @@
1
1
  from typing import Optional, Tuple, Union, Dict
2
- from pydantic import BaseModel
2
+ from pydantic import BaseModel, SecretStr
3
3
  from openai import OpenAI, AsyncOpenAI
4
4
  from openai.types.chat import ChatCompletion
5
5
 
6
+ from deepeval.config.settings import get_settings
6
7
  from deepeval.models.retry_policy import (
7
8
  create_retry_decorator,
8
9
  sdk_retries_for,
9
10
  )
10
11
  from deepeval.models.llms.utils import trim_and_load_json
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ )
11
15
  from deepeval.models import DeepEvalBaseLLM
12
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
13
16
  from deepeval.constants import ProviderSlug as PS
14
17
 
15
18
 
@@ -21,31 +24,35 @@ class LocalModel(DeepEvalBaseLLM):
21
24
  def __init__(
22
25
  self,
23
26
  model: Optional[str] = None,
24
- base_url: Optional[str] = None,
25
27
  api_key: Optional[str] = None,
28
+ base_url: Optional[str] = None,
26
29
  temperature: float = 0,
27
30
  format: Optional[str] = None,
28
31
  generation_kwargs: Optional[Dict] = None,
29
32
  **kwargs,
30
33
  ):
31
- model_name = model or KEY_FILE_HANDLER.fetch_data(
32
- ModelKeyValues.LOCAL_MODEL_NAME
33
- )
34
- self.local_model_api_key = api_key or KEY_FILE_HANDLER.fetch_data(
35
- ModelKeyValues.LOCAL_MODEL_API_KEY
36
- )
37
- self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
38
- ModelKeyValues.LOCAL_MODEL_BASE_URL
39
- )
40
- self.format = format or KEY_FILE_HANDLER.fetch_data(
41
- ModelKeyValues.LOCAL_MODEL_FORMAT
34
+ settings = get_settings()
35
+
36
+ model = model or settings.LOCAL_MODEL_NAME
37
+ if api_key is not None:
38
+ # keep it secret, keep it safe from serializings, logging and alike
39
+ self.local_model_api_key: SecretStr | None = SecretStr(api_key)
40
+ else:
41
+ self.local_model_api_key = settings.LOCAL_MODEL_API_KEY
42
+
43
+ self.base_url = (
44
+ base_url
45
+ or settings.LOCAL_MODEL_BASE_URL
46
+ and str(settings.LOCAL_MODEL_BASE_URL)
42
47
  )
48
+ self.format = format or settings.LOCAL_MODEL_FORMAT
43
49
  if temperature < 0:
44
50
  raise ValueError("Temperature must be >= 0.")
45
51
  self.temperature = temperature
52
+ # Keep sanitized kwargs for client call to strip legacy keys
46
53
  self.kwargs = kwargs
47
54
  self.generation_kwargs = generation_kwargs or {}
48
- super().__init__(model_name)
55
+ super().__init__(model)
49
56
 
50
57
  ###############################################
51
58
  # Other generate functions
@@ -55,9 +62,10 @@ class LocalModel(DeepEvalBaseLLM):
55
62
  def generate(
56
63
  self, prompt: str, schema: Optional[BaseModel] = None
57
64
  ) -> Tuple[Union[str, Dict], float]:
65
+
58
66
  client = self.load_model(async_mode=False)
59
67
  response: ChatCompletion = client.chat.completions.create(
60
- model=self.model_name,
68
+ model=self.name,
61
69
  messages=[{"role": "user", "content": prompt}],
62
70
  temperature=self.temperature,
63
71
  **self.generation_kwargs,
@@ -74,9 +82,10 @@ class LocalModel(DeepEvalBaseLLM):
74
82
  async def a_generate(
75
83
  self, prompt: str, schema: Optional[BaseModel] = None
76
84
  ) -> Tuple[Union[str, Dict], float]:
85
+
77
86
  client = self.load_model(async_mode=True)
78
87
  response: ChatCompletion = await client.chat.completions.create(
79
- model=self.model_name,
88
+ model=self.name,
80
89
  messages=[{"role": "user", "content": prompt}],
81
90
  temperature=self.temperature,
82
91
  **self.generation_kwargs,
@@ -94,10 +103,7 @@ class LocalModel(DeepEvalBaseLLM):
94
103
  ###############################################
95
104
 
96
105
  def get_model_name(self):
97
- model_name = KEY_FILE_HANDLER.fetch_data(
98
- ModelKeyValues.LOCAL_MODEL_NAME
99
- )
100
- return f"{model_name} (Local Model)"
106
+ return f"{self.name} (Local Model)"
101
107
 
102
108
  def load_model(self, async_mode: bool = False):
103
109
  if not async_mode:
@@ -115,8 +121,15 @@ class LocalModel(DeepEvalBaseLLM):
115
121
  return kwargs
116
122
 
117
123
  def _build_client(self, cls):
124
+ local_model_api_key = require_secret_api_key(
125
+ self.local_model_api_key,
126
+ provider_label="Local",
127
+ env_var_name="LOCAL_MODEL_API_KEY",
128
+ param_hint="`api_key` to LocalModel(...)",
129
+ )
130
+
118
131
  kw = dict(
119
- api_key=self.local_model_api_key,
132
+ api_key=local_model_api_key,
120
133
  base_url=self.base_url,
121
134
  **self._client_kwargs(),
122
135
  )