deepeval 3.7.3__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/evaluate/configs.py +1 -1
  5. deepeval/evaluate/execute.py +4 -1
  6. deepeval/metrics/answer_relevancy/template.py +4 -4
  7. deepeval/metrics/argument_correctness/template.py +2 -2
  8. deepeval/metrics/bias/template.py +3 -3
  9. deepeval/metrics/contextual_precision/template.py +6 -6
  10. deepeval/metrics/contextual_recall/template.py +2 -2
  11. deepeval/metrics/contextual_relevancy/template.py +3 -3
  12. deepeval/metrics/conversation_completeness/template.py +2 -2
  13. deepeval/metrics/conversational_dag/templates.py +4 -4
  14. deepeval/metrics/conversational_g_eval/template.py +4 -3
  15. deepeval/metrics/dag/templates.py +4 -4
  16. deepeval/metrics/faithfulness/template.py +4 -4
  17. deepeval/metrics/hallucination/template.py +4 -4
  18. deepeval/metrics/misuse/template.py +2 -2
  19. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  20. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  21. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  22. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  23. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  24. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  25. deepeval/metrics/non_advice/template.py +2 -2
  26. deepeval/metrics/pii_leakage/template.py +2 -2
  27. deepeval/metrics/prompt_alignment/template.py +4 -4
  28. deepeval/metrics/role_violation/template.py +2 -2
  29. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  30. deepeval/metrics/toxicity/template.py +4 -4
  31. deepeval/metrics/turn_relevancy/template.py +2 -2
  32. deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  33. deepeval/models/embedding_models/local_embedding_model.py +23 -10
  34. deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  35. deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  36. deepeval/models/llms/anthropic_model.py +17 -5
  37. deepeval/models/llms/azure_model.py +30 -18
  38. deepeval/models/llms/deepseek_model.py +22 -12
  39. deepeval/models/llms/gemini_model.py +120 -87
  40. deepeval/models/llms/grok_model.py +23 -16
  41. deepeval/models/llms/kimi_model.py +23 -12
  42. deepeval/models/llms/litellm_model.py +63 -25
  43. deepeval/models/llms/local_model.py +26 -18
  44. deepeval/models/llms/ollama_model.py +17 -7
  45. deepeval/models/llms/openai_model.py +22 -17
  46. deepeval/models/llms/portkey_model.py +132 -0
  47. deepeval/models/mlllms/azure_model.py +28 -19
  48. deepeval/models/mlllms/gemini_model.py +102 -73
  49. deepeval/models/mlllms/ollama_model.py +40 -9
  50. deepeval/models/mlllms/openai_model.py +65 -14
  51. deepeval/models/utils.py +48 -3
  52. deepeval/optimization/__init__.py +13 -0
  53. deepeval/optimization/adapters/__init__.py +2 -0
  54. deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  55. deepeval/optimization/aggregates.py +14 -0
  56. deepeval/optimization/configs.py +34 -0
  57. deepeval/optimization/copro/configs.py +31 -0
  58. deepeval/optimization/copro/loop.py +837 -0
  59. deepeval/optimization/gepa/__init__.py +7 -0
  60. deepeval/optimization/gepa/configs.py +115 -0
  61. deepeval/optimization/gepa/loop.py +677 -0
  62. deepeval/optimization/miprov2/configs.py +134 -0
  63. deepeval/optimization/miprov2/loop.py +785 -0
  64. deepeval/optimization/mutations/__init__.py +0 -0
  65. deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  66. deepeval/optimization/policies/__init__.py +16 -0
  67. deepeval/optimization/policies/selection.py +166 -0
  68. deepeval/optimization/policies/tie_breaker.py +67 -0
  69. deepeval/optimization/prompt_optimizer.py +462 -0
  70. deepeval/optimization/simba/__init__.py +0 -0
  71. deepeval/optimization/simba/configs.py +33 -0
  72. deepeval/optimization/simba/loop.py +983 -0
  73. deepeval/optimization/simba/types.py +15 -0
  74. deepeval/optimization/types.py +361 -0
  75. deepeval/optimization/utils.py +598 -0
  76. deepeval/prompt/prompt.py +10 -5
  77. deepeval/test_run/cache.py +2 -0
  78. deepeval/test_run/test_run.py +6 -1
  79. deepeval/utils.py +24 -0
  80. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
  81. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/RECORD +84 -59
  82. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
  83. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
  84. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,20 @@
1
- from typing import Optional, Tuple, List, Union
1
+ import base64
2
+ from typing import Optional, Tuple, List, Union, Dict
2
3
  from openai import OpenAI, AsyncOpenAI
3
4
  from openai.types.chat import ParsedChatCompletion
4
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, SecretStr
5
6
  from io import BytesIO
6
- import base64
7
7
 
8
+ from deepeval.config.settings import get_settings
8
9
  from deepeval.models.llms.openai_model import (
9
10
  model_pricing,
10
11
  structured_outputs_models,
12
+ _request_timeout_seconds,
11
13
  )
12
14
  from deepeval.models import DeepEvalBaseMLLM
13
15
  from deepeval.models.llms.utils import trim_and_load_json
14
16
  from deepeval.test_case import MLLMImage
15
- from deepeval.models.utils import parse_model_name
17
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
16
18
  from deepeval.models.retry_policy import (
17
19
  create_retry_decorator,
18
20
  sdk_retries_for,
@@ -60,17 +62,26 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
60
62
  *args,
61
63
  **kwargs,
62
64
  ):
65
+ settings = get_settings()
63
66
  model_name = None
64
67
  if isinstance(model, str):
65
68
  model_name = parse_model_name(model)
66
69
  if model_name not in valid_multimodal_gpt_models:
67
70
  raise ValueError(
68
- f"Invalid model. Available Multimodal GPT models: {', '.join(model for model in valid_multimodal_gpt_models)}"
71
+ f"Invalid model. Available Multimodal GPT models: "
72
+ f"{', '.join(model for model in valid_multimodal_gpt_models)}"
69
73
  )
74
+ elif settings.OPENAI_MODEL_NAME is not None:
75
+ model_name = settings.OPENAI_MODEL_NAME
70
76
  elif model is None:
71
77
  model_name = default_multimodal_gpt_model
72
78
 
73
- self._openai_api_key = _openai_api_key
79
+ if _openai_api_key is not None:
80
+ # keep it secret, keep it safe from serializings, logging and aolike
81
+ self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
82
+ else:
83
+ self._openai_api_key = settings.OPENAI_API_KEY
84
+
74
85
  self.args = args
75
86
  self.kwargs = kwargs
76
87
 
@@ -86,7 +97,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
86
97
  multimodal_input: List[Union[str, MLLMImage]],
87
98
  schema: Optional[BaseModel] = None,
88
99
  ) -> Tuple[str, float]:
89
- client = OpenAI(api_key=self._openai_api_key)
100
+ client = self.load_model(async_mode=False)
90
101
  prompt = self.generate_prompt(multimodal_input)
91
102
 
92
103
  if schema:
@@ -123,7 +134,7 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
123
134
  multimodal_input: List[Union[str, MLLMImage]],
124
135
  schema: Optional[BaseModel] = None,
125
136
  ) -> Tuple[str, float]:
126
- client = AsyncOpenAI(api_key=self._openai_api_key)
137
+ client = self.load_model(async_mode=True)
127
138
  prompt = self.generate_prompt(multimodal_input)
128
139
 
129
140
  if schema:
@@ -247,12 +258,52 @@ class MultimodalOpenAIModel(DeepEvalBaseMLLM):
247
258
  base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
248
259
  return base64_encoded_image
249
260
 
250
- def _client(self, async_mode: bool = False):
251
- kw = {"api_key": self._openai_api_key}
252
- if not sdk_retries_for(PS.OPENAI):
253
- kw["max_retries"] = 0
254
- Client = AsyncOpenAI if async_mode else OpenAI
255
- return Client(**kw)
261
+ ###############################################
262
+ # Model
263
+ ###############################################
256
264
 
257
265
  def get_model_name(self):
258
266
  return self.model_name
267
+
268
+ def load_model(self, async_mode: bool = False):
269
+ Client = AsyncOpenAI if async_mode else OpenAI
270
+ return self._build_client(Client)
271
+
272
+ def _client_kwargs(self) -> Dict:
273
+ """
274
+ If Tenacity is managing retries, force OpenAI SDK retries off to avoid
275
+ double retries. If the user opts into SDK retries for 'openai' via
276
+ DEEPEVAL_SDK_RETRY_PROVIDERS, leave their retry settings as is.
277
+ """
278
+ kwargs: Dict = {}
279
+ if not sdk_retries_for(PS.OPENAI):
280
+ kwargs["max_retries"] = 0
281
+
282
+ if not kwargs.get("timeout"):
283
+ kwargs["timeout"] = _request_timeout_seconds()
284
+ return kwargs
285
+
286
+ def _build_client(self, cls):
287
+ api_key = require_secret_api_key(
288
+ self._openai_api_key,
289
+ provider_label="OpenAI",
290
+ env_var_name="OPENAI_API_KEY",
291
+ param_hint="`_openai_api_key` to MultimodalOpenAIModel(...)",
292
+ )
293
+
294
+ kw = dict(
295
+ api_key=api_key,
296
+ **self._client_kwargs(),
297
+ )
298
+ try:
299
+ return cls(**kw)
300
+ except TypeError as e:
301
+ # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
302
+ if "max_retries" in str(e):
303
+ kw.pop("max_retries", None)
304
+ return cls(**kw)
305
+ raise
306
+
307
+ def _client(self, async_mode: bool = False):
308
+ # Backwards-compat path for internal callers in this module
309
+ return self.load_model(async_mode=async_mode)
deepeval/models/utils.py CHANGED
@@ -1,4 +1,7 @@
1
1
  from typing import Optional
2
+ from pydantic import SecretStr
3
+
4
+ from deepeval.errors import DeepEvalError
2
5
 
3
6
 
4
7
  def parse_model_name(model_name: Optional[str] = None) -> str:
@@ -25,7 +28,49 @@ def parse_model_name(model_name: Optional[str] = None) -> str:
25
28
  if model_name is None:
26
29
  return None
27
30
 
28
- # if "/" in model_name:
29
- # _, parsed_model_name = model_name.split("/", 1)
30
- # return parsed_model_name
31
+ if "/" in model_name:
32
+ _, parsed_model_name = model_name.split("/", 1)
33
+ return parsed_model_name
31
34
  return model_name
35
+
36
+
37
+ def require_secret_api_key(
38
+ secret: Optional[SecretStr],
39
+ *,
40
+ provider_label: str,
41
+ env_var_name: str,
42
+ param_hint: str,
43
+ ) -> str:
44
+ """
45
+ Normalize and validate a provider API key stored as a SecretStr.
46
+
47
+ Args:
48
+ secret:
49
+ The SecretStr coming from Settings or an explicit constructor arg.
50
+ provider_label:
51
+ Human readable provider name for error messages, such as Anthropic, or OpenAI etc
52
+ env_var_name:
53
+ The environment variable backing this key
54
+ param_hint:
55
+ A short hint telling users how to pass the key explicitly
56
+
57
+ Returns:
58
+ The underlying API key string.
59
+
60
+ Raises:
61
+ DeepEvalError: if the key is missing or empty.
62
+ """
63
+ if secret is None:
64
+ raise DeepEvalError(
65
+ f"{provider_label} API key is not configured. "
66
+ f"Set {env_var_name} in your environment or pass "
67
+ f"{param_hint}."
68
+ )
69
+
70
+ api_key = secret.get_secret_value()
71
+ if not api_key:
72
+ raise DeepEvalError(
73
+ f"{provider_label} API key is empty. Please configure a valid key."
74
+ )
75
+
76
+ return api_key
@@ -0,0 +1,13 @@
1
+ from deepeval.optimization.prompt_optimizer import PromptOptimizer
2
+ from deepeval.optimization.configs import OptimizerDisplayConfig
3
+ from deepeval.optimization.gepa.loop import (
4
+ GEPARunner as GEPARunner,
5
+ GEPAConfig as GEPAConfig,
6
+ )
7
+
8
+ __all__ = [
9
+ "GEPARunner",
10
+ "GEPAConfig",
11
+ "PromptOptimizer",
12
+ "OptimizerDisplayConfig",
13
+ ]
@@ -0,0 +1,2 @@
1
+ # nothing yet
2
+ __all__ = []