deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/dataset/golden.py +54 -2
  5. deepeval/evaluate/configs.py +1 -1
  6. deepeval/evaluate/evaluate.py +16 -8
  7. deepeval/evaluate/execute.py +74 -27
  8. deepeval/evaluate/utils.py +26 -22
  9. deepeval/integrations/pydantic_ai/agent.py +19 -2
  10. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. deepeval/metrics/__init__.py +14 -12
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  13. deepeval/metrics/answer_relevancy/template.py +188 -92
  14. deepeval/metrics/argument_correctness/template.py +2 -2
  15. deepeval/metrics/base_metric.py +2 -5
  16. deepeval/metrics/bias/template.py +3 -3
  17. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  18. deepeval/metrics/contextual_precision/template.py +115 -66
  19. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  20. deepeval/metrics/contextual_recall/template.py +106 -55
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  22. deepeval/metrics/contextual_relevancy/template.py +87 -58
  23. deepeval/metrics/conversation_completeness/template.py +2 -2
  24. deepeval/metrics/conversational_dag/templates.py +4 -4
  25. deepeval/metrics/conversational_g_eval/template.py +4 -3
  26. deepeval/metrics/dag/templates.py +5 -5
  27. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  28. deepeval/metrics/faithfulness/schema.py +1 -1
  29. deepeval/metrics/faithfulness/template.py +200 -115
  30. deepeval/metrics/g_eval/utils.py +2 -2
  31. deepeval/metrics/hallucination/template.py +4 -4
  32. deepeval/metrics/indicator.py +4 -4
  33. deepeval/metrics/misuse/template.py +2 -2
  34. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  35. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  36. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  37. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  38. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  39. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  40. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  41. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  42. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  43. deepeval/metrics/non_advice/template.py +2 -2
  44. deepeval/metrics/pii_leakage/template.py +2 -2
  45. deepeval/metrics/prompt_alignment/template.py +4 -4
  46. deepeval/metrics/ragas.py +3 -3
  47. deepeval/metrics/role_violation/template.py +2 -2
  48. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  49. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  50. deepeval/metrics/toxicity/template.py +4 -4
  51. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  52. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  53. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  54. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  55. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  56. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  57. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  58. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  59. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  60. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  61. deepeval/metrics/turn_faithfulness/template.py +218 -0
  62. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  63. deepeval/metrics/turn_relevancy/template.py +2 -2
  64. deepeval/metrics/utils.py +39 -58
  65. deepeval/models/__init__.py +0 -12
  66. deepeval/models/base_model.py +16 -38
  67. deepeval/models/embedding_models/__init__.py +7 -0
  68. deepeval/models/embedding_models/azure_embedding_model.py +69 -32
  69. deepeval/models/embedding_models/local_embedding_model.py +39 -22
  70. deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
  71. deepeval/models/embedding_models/openai_embedding_model.py +50 -15
  72. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  73. deepeval/models/llms/anthropic_model.py +53 -20
  74. deepeval/models/llms/azure_model.py +140 -43
  75. deepeval/models/llms/deepseek_model.py +38 -23
  76. deepeval/models/llms/gemini_model.py +222 -103
  77. deepeval/models/llms/grok_model.py +39 -27
  78. deepeval/models/llms/kimi_model.py +39 -23
  79. deepeval/models/llms/litellm_model.py +103 -45
  80. deepeval/models/llms/local_model.py +35 -22
  81. deepeval/models/llms/ollama_model.py +129 -17
  82. deepeval/models/llms/openai_model.py +151 -50
  83. deepeval/models/llms/portkey_model.py +149 -0
  84. deepeval/models/llms/utils.py +5 -3
  85. deepeval/models/retry_policy.py +17 -14
  86. deepeval/models/utils.py +94 -4
  87. deepeval/optimizer/__init__.py +5 -0
  88. deepeval/optimizer/algorithms/__init__.py +6 -0
  89. deepeval/optimizer/algorithms/base.py +29 -0
  90. deepeval/optimizer/algorithms/configs.py +18 -0
  91. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  92. deepeval/optimizer/algorithms/copro/copro.py +836 -0
  93. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  94. deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
  95. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  96. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  97. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  98. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  99. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  100. deepeval/optimizer/algorithms/simba/simba.py +999 -0
  101. deepeval/optimizer/algorithms/simba/types.py +15 -0
  102. deepeval/optimizer/configs.py +31 -0
  103. deepeval/optimizer/policies.py +227 -0
  104. deepeval/optimizer/prompt_optimizer.py +263 -0
  105. deepeval/optimizer/rewriter/__init__.py +5 -0
  106. deepeval/optimizer/rewriter/rewriter.py +124 -0
  107. deepeval/optimizer/rewriter/utils.py +214 -0
  108. deepeval/optimizer/scorer/__init__.py +5 -0
  109. deepeval/optimizer/scorer/base.py +86 -0
  110. deepeval/optimizer/scorer/scorer.py +316 -0
  111. deepeval/optimizer/scorer/utils.py +30 -0
  112. deepeval/optimizer/types.py +148 -0
  113. deepeval/optimizer/utils.py +480 -0
  114. deepeval/prompt/prompt.py +7 -6
  115. deepeval/test_case/__init__.py +1 -3
  116. deepeval/test_case/api.py +12 -10
  117. deepeval/test_case/conversational_test_case.py +19 -1
  118. deepeval/test_case/llm_test_case.py +152 -1
  119. deepeval/test_case/utils.py +4 -8
  120. deepeval/test_run/api.py +15 -14
  121. deepeval/test_run/cache.py +2 -0
  122. deepeval/test_run/test_run.py +9 -4
  123. deepeval/tracing/patchers.py +9 -4
  124. deepeval/tracing/tracing.py +2 -2
  125. deepeval/utils.py +89 -0
  126. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  127. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
  128. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  129. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  130. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  131. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  132. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  133. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  134. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  135. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  136. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  137. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  138. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  139. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  140. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  141. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  142. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  143. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  144. deepeval/models/mlllms/__init__.py +0 -4
  145. deepeval/models/mlllms/azure_model.py +0 -334
  146. deepeval/models/mlllms/gemini_model.py +0 -284
  147. deepeval/models/mlllms/ollama_model.py +0 -144
  148. deepeval/models/mlllms/openai_model.py +0 -258
  149. deepeval/test_case/mllm_test_case.py +0 -170
  150. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  152. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  153. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  154. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  155. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  156. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
deepeval/metrics/utils.py CHANGED
@@ -2,16 +2,14 @@ import inspect
2
2
  import json
3
3
  import re
4
4
  import sys
5
- import itertools
6
5
  from typing import Any, Dict, Optional, List, Union, Tuple
7
6
 
8
7
  from deepeval.errors import (
9
8
  MissingTestCaseParamsError,
10
- MismatchedTestCaseInputsError,
11
9
  )
10
+ from deepeval.utils import convert_to_multi_modal_array
12
11
  from deepeval.models import (
13
12
  DeepEvalBaseLLM,
14
- DeepEvalBaseMLLM,
15
13
  GPTModel,
16
14
  AnthropicModel,
17
15
  AzureOpenAIModel,
@@ -22,10 +20,6 @@ from deepeval.models import (
22
20
  OllamaEmbeddingModel,
23
21
  LocalEmbeddingModel,
24
22
  GeminiModel,
25
- MultimodalOpenAIModel,
26
- MultimodalGeminiModel,
27
- MultimodalOllamaModel,
28
- MultimodalAzureOpenAIMLLMModel,
29
23
  AmazonBedrockModel,
30
24
  LiteLLMModel,
31
25
  KimiModel,
@@ -45,11 +39,8 @@ from deepeval.metrics import (
45
39
  )
46
40
  from deepeval.models.base_model import DeepEvalBaseEmbeddingModel
47
41
  from deepeval.test_case import (
48
- Turn,
49
42
  LLMTestCase,
50
43
  LLMTestCaseParams,
51
- MLLMTestCase,
52
- MLLMTestCaseParams,
53
44
  ConversationalTestCase,
54
45
  MLLMImage,
55
46
  Turn,
@@ -58,6 +49,13 @@ from deepeval.test_case import (
58
49
  TurnParams,
59
50
  )
60
51
 
52
+ MULTIMODAL_SUPPORTED_MODELS = [
53
+ GPTModel,
54
+ GeminiModel,
55
+ OllamaModel,
56
+ AzureOpenAIModel,
57
+ ]
58
+
61
59
 
62
60
  def copy_metrics(
63
61
  metrics: List[
@@ -201,7 +199,20 @@ def check_conversational_test_case_params(
201
199
  test_case_params: List[TurnParams],
202
200
  metric: BaseConversationalMetric,
203
201
  require_chatbot_role: bool = False,
202
+ model: Optional[DeepEvalBaseLLM] = None,
203
+ multimodal: Optional[bool] = False,
204
204
  ):
205
+ if multimodal:
206
+ if not model or not model.supports_multimodal():
207
+ if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
208
+ raise ValueError(
209
+ f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
210
+ )
211
+ else:
212
+ raise ValueError(
213
+ f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
214
+ )
215
+
205
216
  if isinstance(test_case, ConversationalTestCase) is False:
206
217
  error_str = f"Unable to evaluate test cases that are not of type 'ConversationalTestCase' using the conversational '{metric.__name__}' metric."
207
218
  metric.error = error_str
@@ -289,15 +300,26 @@ def check_arena_test_case_params(
289
300
 
290
301
 
291
302
  def check_mllm_test_case_params(
292
- test_case: MLLMTestCase,
293
- test_case_params: List[MLLMTestCaseParams],
303
+ test_case: LLMTestCase,
304
+ test_case_params: List[LLMTestCaseParams],
294
305
  input_image_count: Optional[int],
295
306
  actual_output_image_count: Optional[int],
296
307
  metric: BaseMetric,
308
+ model: Optional[DeepEvalBaseLLM] = None,
297
309
  ):
310
+ if not model or not model.supports_multimodal():
311
+ if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
312
+ raise ValueError(
313
+ f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
314
+ )
315
+ else:
316
+ raise ValueError(
317
+ f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
318
+ )
319
+
298
320
  if input_image_count:
299
321
  count = 0
300
- for ele in test_case.input:
322
+ for ele in convert_to_multi_modal_array(test_case.input):
301
323
  if isinstance(ele, MLLMImage):
302
324
  count += 1
303
325
  if count != input_image_count:
@@ -306,18 +328,13 @@ def check_mllm_test_case_params(
306
328
 
307
329
  if actual_output_image_count:
308
330
  count = 0
309
- for ele in test_case.actual_output:
331
+ for ele in convert_to_multi_modal_array(test_case.actual_output):
310
332
  if isinstance(ele, MLLMImage):
311
333
  count += 1
312
334
  if count != actual_output_image_count:
313
335
  error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
314
336
  raise ValueError(error_str)
315
337
 
316
- if isinstance(test_case, MLLMTestCase) is False:
317
- error_str = f"Unable to evaluate test cases that are not of type 'MLLMTestCase' using the '{metric.__name__}' metric."
318
- metric.error = error_str
319
- raise ValueError(error_str)
320
-
321
338
  missing_params = []
322
339
  for param in test_case_params:
323
340
  if getattr(test_case, param.value) is None:
@@ -339,8 +356,8 @@ def check_mllm_test_case_params(
339
356
 
340
357
 
341
358
  def check_mllm_test_cases_params(
342
- test_cases: List[MLLMTestCase],
343
- test_case_params: List[MLLMTestCaseParams],
359
+ test_cases: List[LLMTestCase],
360
+ test_case_params: List[LLMTestCaseParams],
344
361
  input_image_count: Optional[int],
345
362
  actual_output_image_count: Optional[int],
346
363
  metric: BaseMetric,
@@ -459,7 +476,7 @@ def initialize_model(
459
476
  elif should_use_local_model():
460
477
  return LocalModel(), True
461
478
  elif should_use_azure_openai():
462
- return AzureOpenAIModel(model_name=model), True
479
+ return AzureOpenAIModel(model=model), True
463
480
  elif should_use_moonshot_model():
464
481
  return KimiModel(model=model), True
465
482
  elif should_use_grok_model():
@@ -501,42 +518,6 @@ def is_native_model(
501
518
  ###############################################
502
519
 
503
520
 
504
- def initialize_multimodal_model(
505
- model: Optional[Union[str, DeepEvalBaseMLLM]] = None,
506
- ) -> Tuple[DeepEvalBaseLLM, bool]:
507
- """
508
- Returns a tuple of (initialized DeepEvalBaseMLLM, using_native_model boolean)
509
- """
510
- if is_native_mllm(model):
511
- return model, True
512
- if isinstance(model, DeepEvalBaseMLLM):
513
- return model, False
514
- if should_use_gemini_model():
515
- return MultimodalGeminiModel(), True
516
- if should_use_ollama_model():
517
- return MultimodalOllamaModel(), True
518
- elif should_use_azure_openai():
519
- return MultimodalAzureOpenAIMLLMModel(model_name=model), True
520
- elif isinstance(model, str) or model is None:
521
- return MultimodalOpenAIModel(model=model), True
522
- raise TypeError(
523
- f"Unsupported type for model: {type(model)}. Expected None, str, DeepEvalBaseMLLM, MultimodalOpenAIModel, MultimodalOllamaModel."
524
- )
525
-
526
-
527
- def is_native_mllm(
528
- model: Optional[Union[str, DeepEvalBaseLLM]] = None,
529
- ) -> bool:
530
- if (
531
- isinstance(model, MultimodalOpenAIModel)
532
- or isinstance(model, MultimodalOllamaModel)
533
- or isinstance(model, MultimodalGeminiModel)
534
- ):
535
- return True
536
- else:
537
- return False
538
-
539
-
540
521
  ###############################################
541
522
  # Embedding Model
542
523
  ###############################################
@@ -1,7 +1,6 @@
1
1
  from deepeval.models.base_model import (
2
2
  DeepEvalBaseModel,
3
3
  DeepEvalBaseLLM,
4
- DeepEvalBaseMLLM,
5
4
  DeepEvalBaseEmbeddingModel,
6
5
  )
7
6
  from deepeval.models.llms import (
@@ -17,12 +16,6 @@ from deepeval.models.llms import (
17
16
  GrokModel,
18
17
  DeepSeekModel,
19
18
  )
20
- from deepeval.models.mlllms import (
21
- MultimodalOpenAIModel,
22
- MultimodalOllamaModel,
23
- MultimodalGeminiModel,
24
- MultimodalAzureOpenAIMLLMModel,
25
- )
26
19
  from deepeval.models.embedding_models import (
27
20
  OpenAIEmbeddingModel,
28
21
  AzureOpenAIEmbeddingModel,
@@ -33,7 +26,6 @@ from deepeval.models.embedding_models import (
33
26
  __all__ = [
34
27
  "DeepEvalBaseModel",
35
28
  "DeepEvalBaseLLM",
36
- "DeepEvalBaseMLLM",
37
29
  "DeepEvalBaseEmbeddingModel",
38
30
  "GPTModel",
39
31
  "AzureOpenAIModel",
@@ -46,10 +38,6 @@ __all__ = [
46
38
  "KimiModel",
47
39
  "GrokModel",
48
40
  "DeepSeekModel",
49
- "MultimodalOpenAIModel",
50
- "MultimodalOllamaModel",
51
- "MultimodalGeminiModel",
52
- "MultimodalAzureOpenAIMLLMModel",
53
41
  "OpenAIEmbeddingModel",
54
42
  "AzureOpenAIEmbeddingModel",
55
43
  "LocalEmbeddingModel",
@@ -31,9 +31,9 @@ class DeepEvalBaseModel(ABC):
31
31
 
32
32
 
33
33
  class DeepEvalBaseLLM(ABC):
34
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
35
- self.model_name = parse_model_name(model_name)
36
- self.model = self.load_model(*args, **kwargs)
34
+ def __init__(self, model: Optional[str] = None, *args, **kwargs):
35
+ self.name = parse_model_name(model)
36
+ self.model = self.load_model()
37
37
 
38
38
  @abstractmethod
39
39
  def load_model(self, *args, **kwargs) -> "DeepEvalBaseLLM":
@@ -62,6 +62,13 @@ class DeepEvalBaseLLM(ABC):
62
62
  """
63
63
  pass
64
64
 
65
+ @abstractmethod
66
+ def get_model_name(self, *args, **kwargs) -> str:
67
+ return self.name
68
+
69
+ def supports_multimodal(self) -> bool:
70
+ return False
71
+
65
72
  def batch_generate(self, *args, **kwargs) -> List[str]:
66
73
  """Runs the model to output LLM responses.
67
74
 
@@ -72,43 +79,14 @@ class DeepEvalBaseLLM(ABC):
72
79
  "batch_generate is not implemented for this model"
73
80
  )
74
81
 
75
- @abstractmethod
76
- def get_model_name(self, *args, **kwargs) -> str:
77
- pass
78
-
79
-
80
- class DeepEvalBaseMLLM(ABC):
81
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
82
- self.model_name = parse_model_name(model_name)
83
-
84
- @abstractmethod
85
- def generate(self, *args, **kwargs) -> str:
86
- """Runs the model to output MLLM response.
87
-
88
- Returns:
89
- A string.
90
- """
91
- pass
92
-
93
- @abstractmethod
94
- async def a_generate(self, *args, **kwargs) -> str:
95
- """Runs the model to output MLLM response.
96
-
97
- Returns:
98
- A string.
99
- """
100
- pass
101
-
102
- @abstractmethod
103
- def get_model_name(self, *args, **kwargs) -> str:
104
- pass
82
+ def supports_multimodal(self):
83
+ return False
105
84
 
106
85
 
107
86
  class DeepEvalBaseEmbeddingModel(ABC):
108
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
109
- self.model_name = parse_model_name(model_name)
110
-
111
- self.model = self.load_model(*args, **kwargs)
87
+ def __init__(self, model: Optional[str] = None, *args, **kwargs):
88
+ self.name = parse_model_name(model)
89
+ self.model = self.load_model()
112
90
 
113
91
  @abstractmethod
114
92
  def load_model(self, *args, **kwargs) -> "DeepEvalBaseEmbeddingModel":
@@ -157,4 +135,4 @@ class DeepEvalBaseEmbeddingModel(ABC):
157
135
 
158
136
  @abstractmethod
159
137
  def get_model_name(self, *args, **kwargs) -> str:
160
- pass
138
+ return self.name
@@ -2,3 +2,10 @@ from .azure_embedding_model import AzureOpenAIEmbeddingModel
2
2
  from .openai_embedding_model import OpenAIEmbeddingModel
3
3
  from .local_embedding_model import LocalEmbeddingModel
4
4
  from .ollama_embedding_model import OllamaEmbeddingModel
5
+
6
+ __all__ = [
7
+ "AzureOpenAIEmbeddingModel",
8
+ "OpenAIEmbeddingModel",
9
+ "LocalEmbeddingModel",
10
+ "OllamaEmbeddingModel",
11
+ ]
@@ -1,55 +1,85 @@
1
1
  from typing import Dict, List, Optional
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from deepeval.key_handler import (
4
- EmbeddingKeyValues,
5
- ModelKeyValues,
6
- KEY_FILE_HANDLER,
7
- )
3
+ from pydantic import SecretStr
4
+
5
+ from deepeval.config.settings import get_settings
8
6
  from deepeval.models import DeepEvalBaseEmbeddingModel
9
7
  from deepeval.models.retry_policy import (
10
8
  create_retry_decorator,
11
9
  sdk_retries_for,
12
10
  )
13
11
  from deepeval.constants import ProviderSlug as PS
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ normalize_kwargs_and_extract_aliases,
15
+ )
14
16
 
15
17
 
16
18
  retry_azure = create_retry_decorator(PS.AZURE)
17
19
 
20
+ _ALIAS_MAP = {
21
+ "api_key": ["openai_api_key"],
22
+ "base_url": ["azure_endpoint"],
23
+ "deployment_name": ["azure_deployment"],
24
+ }
25
+
18
26
 
19
27
  class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
20
28
  def __init__(
21
29
  self,
22
- openai_api_key: Optional[str] = None,
23
- openai_api_version: Optional[str] = None,
24
- azure_endpoint: Optional[str] = None,
25
- azure_deployment: Optional[str] = None,
26
30
  model: Optional[str] = None,
31
+ api_key: Optional[str] = None,
32
+ base_url: Optional[str] = None,
33
+ deployment_name: Optional[str] = None,
34
+ openai_api_version: Optional[str] = None,
27
35
  generation_kwargs: Optional[Dict] = None,
28
- **client_kwargs,
36
+ **kwargs,
29
37
  ):
30
- self.openai_api_key = openai_api_key or KEY_FILE_HANDLER.fetch_data(
31
- ModelKeyValues.AZURE_OPENAI_API_KEY
38
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
39
+ "AzureOpenAIEmbeddingModel",
40
+ kwargs,
41
+ _ALIAS_MAP,
32
42
  )
43
+
44
+ # re-map depricated keywords to re-named positional args
45
+ if api_key is None and "api_key" in alias_values:
46
+ api_key = alias_values["api_key"]
47
+ if base_url is None and "base_url" in alias_values:
48
+ base_url = alias_values["base_url"]
49
+ if deployment_name is None and "deployment_name" in alias_values:
50
+ deployment_name = alias_values["deployment_name"]
51
+
52
+ settings = get_settings()
53
+
54
+ if api_key is not None:
55
+ # keep it secret, keep it safe from serializings, logging and alike
56
+ self.api_key: SecretStr | None = SecretStr(api_key)
57
+ else:
58
+ self.api_key = settings.AZURE_OPENAI_API_KEY
59
+
33
60
  self.openai_api_version = (
34
- openai_api_version
35
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
61
+ openai_api_version or settings.OPENAI_API_VERSION
36
62
  )
37
- self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
38
- ModelKeyValues.AZURE_OPENAI_ENDPOINT
63
+ self.base_url = (
64
+ base_url
65
+ or settings.AZURE_OPENAI_ENDPOINT
66
+ and str(settings.AZURE_OPENAI_ENDPOINT)
39
67
  )
40
- self.azure_deployment = azure_deployment or KEY_FILE_HANDLER.fetch_data(
41
- EmbeddingKeyValues.AZURE_EMBEDDING_DEPLOYMENT_NAME
68
+
69
+ self.deployment_name = (
70
+ deployment_name or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
42
71
  )
43
- self.client_kwargs = client_kwargs or {}
44
- self.model_name = model or self.azure_deployment
72
+ # Keep sanitized kwargs for client call to strip legacy keys
73
+ self.kwargs = normalized_kwargs
74
+ model = model or self.deployment_name
45
75
  self.generation_kwargs = generation_kwargs or {}
46
- super().__init__(self.model_name)
76
+ super().__init__(model)
47
77
 
48
78
  @retry_azure
49
79
  def embed_text(self, text: str) -> List[float]:
50
80
  client = self.load_model(async_mode=False)
51
81
  response = client.embeddings.create(
52
- input=text, model=self.model_name, **self.generation_kwargs
82
+ input=text, model=self.name, **self.generation_kwargs
53
83
  )
54
84
  return response.data[0].embedding
55
85
 
@@ -57,7 +87,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
57
87
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
58
88
  client = self.load_model(async_mode=False)
59
89
  response = client.embeddings.create(
60
- input=texts, model=self.model_name, **self.generation_kwargs
90
+ input=texts, model=self.name, **self.generation_kwargs
61
91
  )
62
92
  return [item.embedding for item in response.data]
63
93
 
@@ -65,7 +95,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
65
95
  async def a_embed_text(self, text: str) -> List[float]:
66
96
  client = self.load_model(async_mode=True)
67
97
  response = await client.embeddings.create(
68
- input=text, model=self.model_name, **self.generation_kwargs
98
+ input=text, model=self.name, **self.generation_kwargs
69
99
  )
70
100
  return response.data[0].embedding
71
101
 
@@ -73,28 +103,32 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
73
103
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
74
104
  client = self.load_model(async_mode=True)
75
105
  response = await client.embeddings.create(
76
- input=texts, model=self.model_name, **self.generation_kwargs
106
+ input=texts, model=self.name, **self.generation_kwargs
77
107
  )
78
108
  return [item.embedding for item in response.data]
79
109
 
80
- def get_model_name(self) -> str:
81
- return self.model_name
82
-
83
110
  def load_model(self, async_mode: bool = False):
84
111
  if not async_mode:
85
112
  return self._build_client(AzureOpenAI)
86
113
  return self._build_client(AsyncAzureOpenAI)
87
114
 
88
115
  def _build_client(self, cls):
89
- client_kwargs = self.client_kwargs.copy()
116
+ api_key = require_secret_api_key(
117
+ self.api_key,
118
+ provider_label="AzureOpenAI",
119
+ env_var_name="AZURE_OPENAI_API_KEY",
120
+ param_hint="`api_key` to AzureOpenAIEmbeddingModel(...)",
121
+ )
122
+
123
+ client_kwargs = self.kwargs.copy()
90
124
  if not sdk_retries_for(PS.AZURE):
91
125
  client_kwargs["max_retries"] = 0
92
126
 
93
127
  client_init_kwargs = dict(
94
- api_key=self.openai_api_key,
128
+ api_key=api_key,
95
129
  api_version=self.openai_api_version,
96
- azure_endpoint=self.azure_endpoint,
97
- azure_deployment=self.azure_deployment,
130
+ azure_endpoint=self.base_url,
131
+ azure_deployment=self.deployment_name,
98
132
  **client_kwargs,
99
133
  )
100
134
  try:
@@ -105,3 +139,6 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
105
139
  client_init_kwargs.pop("max_retries", None)
106
140
  return cls(**client_init_kwargs)
107
141
  raise
142
+
143
+ def get_model_name(self):
144
+ return f"{self.name} (Azure)"
@@ -1,7 +1,11 @@
1
1
  from openai import OpenAI, AsyncOpenAI
2
2
  from typing import Dict, List, Optional
3
+ from pydantic import SecretStr
3
4
 
4
- from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
5
+ from deepeval.config.settings import get_settings
6
+ from deepeval.models.utils import (
7
+ require_secret_api_key,
8
+ )
5
9
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
10
  from deepeval.models.retry_policy import (
7
11
  create_retry_decorator,
@@ -17,30 +21,36 @@ retry_local = create_retry_decorator(PS.LOCAL)
17
21
  class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
18
22
  def __init__(
19
23
  self,
24
+ model: Optional[str] = None,
20
25
  api_key: Optional[str] = None,
21
26
  base_url: Optional[str] = None,
22
- model: Optional[str] = None,
23
27
  generation_kwargs: Optional[Dict] = None,
24
- **client_kwargs,
28
+ **kwargs,
25
29
  ):
26
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
27
- EmbeddingKeyValues.LOCAL_EMBEDDING_API_KEY
28
- )
29
- self.base_url = base_url or KEY_FILE_HANDLER.fetch_data(
30
- EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
31
- )
32
- self.model_name = model or KEY_FILE_HANDLER.fetch_data(
33
- EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
30
+
31
+ settings = get_settings()
32
+ if api_key is not None:
33
+ # keep it secret, keep it safe from serializings, logging and alike
34
+ self.api_key: SecretStr | None = SecretStr(api_key)
35
+ else:
36
+ self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
37
+
38
+ self.base_url = (
39
+ base_url
40
+ or settings.LOCAL_EMBEDDING_BASE_URL
41
+ and str(settings.LOCAL_EMBEDDING_BASE_URL)
34
42
  )
35
- self.client_kwargs = client_kwargs or {}
43
+ model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
44
+ # Keep sanitized kwargs for client call to strip legacy keys
45
+ self.kwargs = kwargs
36
46
  self.generation_kwargs = generation_kwargs or {}
37
- super().__init__(self.model_name)
47
+ super().__init__(model)
38
48
 
39
49
  @retry_local
40
50
  def embed_text(self, text: str) -> List[float]:
41
51
  embedding_model = self.load_model()
42
52
  response = embedding_model.embeddings.create(
43
- model=self.model_name, input=[text], **self.generation_kwargs
53
+ model=self.name, input=[text], **self.generation_kwargs
44
54
  )
45
55
  return response.data[0].embedding
46
56
 
@@ -48,7 +58,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
48
58
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
49
59
  embedding_model = self.load_model()
50
60
  response = embedding_model.embeddings.create(
51
- model=self.model_name, input=texts, **self.generation_kwargs
61
+ model=self.name, input=texts, **self.generation_kwargs
52
62
  )
53
63
  return [data.embedding for data in response.data]
54
64
 
@@ -56,7 +66,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
56
66
  async def a_embed_text(self, text: str) -> List[float]:
57
67
  embedding_model = self.load_model(async_mode=True)
58
68
  response = await embedding_model.embeddings.create(
59
- model=self.model_name, input=[text], **self.generation_kwargs
69
+ model=self.name, input=[text], **self.generation_kwargs
60
70
  )
61
71
  return response.data[0].embedding
62
72
 
@@ -64,7 +74,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
64
74
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
65
75
  embedding_model = self.load_model(async_mode=True)
66
76
  response = await embedding_model.embeddings.create(
67
- model=self.model_name, input=texts, **self.generation_kwargs
77
+ model=self.name, input=texts, **self.generation_kwargs
68
78
  )
69
79
  return [data.embedding for data in response.data]
70
80
 
@@ -72,21 +82,25 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
72
82
  # Model
73
83
  ###############################################
74
84
 
75
- def get_model_name(self):
76
- return self.model_name
77
-
78
85
  def load_model(self, async_mode: bool = False):
79
86
  if not async_mode:
80
87
  return self._build_client(OpenAI)
81
88
  return self._build_client(AsyncOpenAI)
82
89
 
83
90
  def _build_client(self, cls):
84
- client_kwargs = self.client_kwargs.copy()
91
+ api_key = require_secret_api_key(
92
+ self.api_key,
93
+ provider_label="OpenAI",
94
+ env_var_name="LOCAL_EMBEDDING_API_KEY",
95
+ param_hint="`api_key` to LocalEmbeddingModel(...)",
96
+ )
97
+
98
+ client_kwargs = self.kwargs.copy()
85
99
  if not sdk_retries_for(PS.LOCAL):
86
100
  client_kwargs["max_retries"] = 0
87
101
 
88
102
  client_init_kwargs = dict(
89
- api_key=self.api_key,
103
+ api_key=api_key,
90
104
  base_url=self.base_url,
91
105
  **client_kwargs,
92
106
  )
@@ -98,3 +112,6 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
98
112
  client_init_kwargs.pop("max_retries", None)
99
113
  return cls(**client_init_kwargs)
100
114
  raise
115
+
116
+ def get_model_name(self):
117
+ return f"{self.name} (Local Model)"