deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
deepeval/metrics/utils.py CHANGED
@@ -2,16 +2,14 @@ import inspect
2
2
  import json
3
3
  import re
4
4
  import sys
5
- import itertools
6
5
  from typing import Any, Dict, Optional, List, Union, Tuple
7
6
 
8
7
  from deepeval.errors import (
9
8
  MissingTestCaseParamsError,
10
- MismatchedTestCaseInputsError,
11
9
  )
10
+ from deepeval.utils import convert_to_multi_modal_array
12
11
  from deepeval.models import (
13
12
  DeepEvalBaseLLM,
14
- DeepEvalBaseMLLM,
15
13
  GPTModel,
16
14
  AnthropicModel,
17
15
  AzureOpenAIModel,
@@ -22,10 +20,6 @@ from deepeval.models import (
22
20
  OllamaEmbeddingModel,
23
21
  LocalEmbeddingModel,
24
22
  GeminiModel,
25
- MultimodalOpenAIModel,
26
- MultimodalGeminiModel,
27
- MultimodalOllamaModel,
28
- MultimodalAzureOpenAIMLLMModel,
29
23
  AmazonBedrockModel,
30
24
  LiteLLMModel,
31
25
  KimiModel,
@@ -45,11 +39,8 @@ from deepeval.metrics import (
45
39
  )
46
40
  from deepeval.models.base_model import DeepEvalBaseEmbeddingModel
47
41
  from deepeval.test_case import (
48
- Turn,
49
42
  LLMTestCase,
50
43
  LLMTestCaseParams,
51
- MLLMTestCase,
52
- MLLMTestCaseParams,
53
44
  ConversationalTestCase,
54
45
  MLLMImage,
55
46
  Turn,
@@ -58,6 +49,13 @@ from deepeval.test_case import (
58
49
  TurnParams,
59
50
  )
60
51
 
52
+ MULTIMODAL_SUPPORTED_MODELS = [
53
+ GPTModel,
54
+ GeminiModel,
55
+ OllamaModel,
56
+ AzureOpenAIModel,
57
+ ]
58
+
61
59
 
62
60
  def copy_metrics(
63
61
  metrics: List[
@@ -201,7 +199,20 @@ def check_conversational_test_case_params(
201
199
  test_case_params: List[TurnParams],
202
200
  metric: BaseConversationalMetric,
203
201
  require_chatbot_role: bool = False,
202
+ model: Optional[DeepEvalBaseLLM] = None,
203
+ multimodal: Optional[bool] = False,
204
204
  ):
205
+ if multimodal:
206
+ if not model or not model.supports_multimodal():
207
+ if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
208
+ raise ValueError(
209
+ f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
210
+ )
211
+ else:
212
+ raise ValueError(
213
+ f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
214
+ )
215
+
205
216
  if isinstance(test_case, ConversationalTestCase) is False:
206
217
  error_str = f"Unable to evaluate test cases that are not of type 'ConversationalTestCase' using the conversational '{metric.__name__}' metric."
207
218
  metric.error = error_str
@@ -289,15 +300,26 @@ def check_arena_test_case_params(
289
300
 
290
301
 
291
302
  def check_mllm_test_case_params(
292
- test_case: MLLMTestCase,
293
- test_case_params: List[MLLMTestCaseParams],
303
+ test_case: LLMTestCase,
304
+ test_case_params: List[LLMTestCaseParams],
294
305
  input_image_count: Optional[int],
295
306
  actual_output_image_count: Optional[int],
296
307
  metric: BaseMetric,
308
+ model: Optional[DeepEvalBaseLLM] = None,
297
309
  ):
310
+ if not model or not model.supports_multimodal():
311
+ if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
312
+ raise ValueError(
313
+ f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
314
+ )
315
+ else:
316
+ raise ValueError(
317
+ f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
318
+ )
319
+
298
320
  if input_image_count:
299
321
  count = 0
300
- for ele in test_case.input:
322
+ for ele in convert_to_multi_modal_array(test_case.input):
301
323
  if isinstance(ele, MLLMImage):
302
324
  count += 1
303
325
  if count != input_image_count:
@@ -306,18 +328,13 @@ def check_mllm_test_case_params(
306
328
 
307
329
  if actual_output_image_count:
308
330
  count = 0
309
- for ele in test_case.actual_output:
331
+ for ele in convert_to_multi_modal_array(test_case.actual_output):
310
332
  if isinstance(ele, MLLMImage):
311
333
  count += 1
312
334
  if count != actual_output_image_count:
313
335
  error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
314
336
  raise ValueError(error_str)
315
337
 
316
- if isinstance(test_case, MLLMTestCase) is False:
317
- error_str = f"Unable to evaluate test cases that are not of type 'MLLMTestCase' using the '{metric.__name__}' metric."
318
- metric.error = error_str
319
- raise ValueError(error_str)
320
-
321
338
  missing_params = []
322
339
  for param in test_case_params:
323
340
  if getattr(test_case, param.value) is None:
@@ -339,8 +356,8 @@ def check_mllm_test_case_params(
339
356
 
340
357
 
341
358
  def check_mllm_test_cases_params(
342
- test_cases: List[MLLMTestCase],
343
- test_case_params: List[MLLMTestCaseParams],
359
+ test_cases: List[LLMTestCase],
360
+ test_case_params: List[LLMTestCaseParams],
344
361
  input_image_count: Optional[int],
345
362
  actual_output_image_count: Optional[int],
346
363
  metric: BaseMetric,
@@ -459,7 +476,7 @@ def initialize_model(
459
476
  elif should_use_local_model():
460
477
  return LocalModel(), True
461
478
  elif should_use_azure_openai():
462
- return AzureOpenAIModel(model_name=model), True
479
+ return AzureOpenAIModel(model=model), True
463
480
  elif should_use_moonshot_model():
464
481
  return KimiModel(model=model), True
465
482
  elif should_use_grok_model():
@@ -501,42 +518,6 @@ def is_native_model(
501
518
  ###############################################
502
519
 
503
520
 
504
- def initialize_multimodal_model(
505
- model: Optional[Union[str, DeepEvalBaseMLLM]] = None,
506
- ) -> Tuple[DeepEvalBaseLLM, bool]:
507
- """
508
- Returns a tuple of (initialized DeepEvalBaseMLLM, using_native_model boolean)
509
- """
510
- if is_native_mllm(model):
511
- return model, True
512
- if isinstance(model, DeepEvalBaseMLLM):
513
- return model, False
514
- if should_use_gemini_model():
515
- return MultimodalGeminiModel(), True
516
- if should_use_ollama_model():
517
- return MultimodalOllamaModel(), True
518
- elif should_use_azure_openai():
519
- return MultimodalAzureOpenAIMLLMModel(model_name=model), True
520
- elif isinstance(model, str) or model is None:
521
- return MultimodalOpenAIModel(model=model), True
522
- raise TypeError(
523
- f"Unsupported type for model: {type(model)}. Expected None, str, DeepEvalBaseMLLM, MultimodalOpenAIModel, MultimodalOllamaModel."
524
- )
525
-
526
-
527
- def is_native_mllm(
528
- model: Optional[Union[str, DeepEvalBaseLLM]] = None,
529
- ) -> bool:
530
- if (
531
- isinstance(model, MultimodalOpenAIModel)
532
- or isinstance(model, MultimodalOllamaModel)
533
- or isinstance(model, MultimodalGeminiModel)
534
- ):
535
- return True
536
- else:
537
- return False
538
-
539
-
540
521
  ###############################################
541
522
  # Embedding Model
542
523
  ###############################################
@@ -1,7 +1,6 @@
1
1
  from deepeval.models.base_model import (
2
2
  DeepEvalBaseModel,
3
3
  DeepEvalBaseLLM,
4
- DeepEvalBaseMLLM,
5
4
  DeepEvalBaseEmbeddingModel,
6
5
  )
7
6
  from deepeval.models.llms import (
@@ -17,12 +16,6 @@ from deepeval.models.llms import (
17
16
  GrokModel,
18
17
  DeepSeekModel,
19
18
  )
20
- from deepeval.models.mlllms import (
21
- MultimodalOpenAIModel,
22
- MultimodalOllamaModel,
23
- MultimodalGeminiModel,
24
- MultimodalAzureOpenAIMLLMModel,
25
- )
26
19
  from deepeval.models.embedding_models import (
27
20
  OpenAIEmbeddingModel,
28
21
  AzureOpenAIEmbeddingModel,
@@ -33,7 +26,6 @@ from deepeval.models.embedding_models import (
33
26
  __all__ = [
34
27
  "DeepEvalBaseModel",
35
28
  "DeepEvalBaseLLM",
36
- "DeepEvalBaseMLLM",
37
29
  "DeepEvalBaseEmbeddingModel",
38
30
  "GPTModel",
39
31
  "AzureOpenAIModel",
@@ -46,10 +38,6 @@ __all__ = [
46
38
  "KimiModel",
47
39
  "GrokModel",
48
40
  "DeepSeekModel",
49
- "MultimodalOpenAIModel",
50
- "MultimodalOllamaModel",
51
- "MultimodalGeminiModel",
52
- "MultimodalAzureOpenAIMLLMModel",
53
41
  "OpenAIEmbeddingModel",
54
42
  "AzureOpenAIEmbeddingModel",
55
43
  "LocalEmbeddingModel",
@@ -31,9 +31,9 @@ class DeepEvalBaseModel(ABC):
31
31
 
32
32
 
33
33
  class DeepEvalBaseLLM(ABC):
34
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
35
- self.model_name = parse_model_name(model_name)
36
- self.model = self.load_model(*args, **kwargs)
34
+ def __init__(self, model: Optional[str] = None, *args, **kwargs):
35
+ self.name = parse_model_name(model)
36
+ self.model = self.load_model()
37
37
 
38
38
  @abstractmethod
39
39
  def load_model(self, *args, **kwargs) -> "DeepEvalBaseLLM":
@@ -62,6 +62,13 @@ class DeepEvalBaseLLM(ABC):
62
62
  """
63
63
  pass
64
64
 
65
+ @abstractmethod
66
+ def get_model_name(self, *args, **kwargs) -> str:
67
+ return self.name
68
+
69
+ def supports_multimodal(self) -> bool:
70
+ return False
71
+
65
72
  def batch_generate(self, *args, **kwargs) -> List[str]:
66
73
  """Runs the model to output LLM responses.
67
74
 
@@ -72,43 +79,14 @@ class DeepEvalBaseLLM(ABC):
72
79
  "batch_generate is not implemented for this model"
73
80
  )
74
81
 
75
- @abstractmethod
76
- def get_model_name(self, *args, **kwargs) -> str:
77
- pass
78
-
79
-
80
- class DeepEvalBaseMLLM(ABC):
81
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
82
- self.model_name = parse_model_name(model_name)
83
-
84
- @abstractmethod
85
- def generate(self, *args, **kwargs) -> str:
86
- """Runs the model to output MLLM response.
87
-
88
- Returns:
89
- A string.
90
- """
91
- pass
92
-
93
- @abstractmethod
94
- async def a_generate(self, *args, **kwargs) -> str:
95
- """Runs the model to output MLLM response.
96
-
97
- Returns:
98
- A string.
99
- """
100
- pass
101
-
102
- @abstractmethod
103
- def get_model_name(self, *args, **kwargs) -> str:
104
- pass
82
+ def supports_multimodal(self):
83
+ return False
105
84
 
106
85
 
107
86
  class DeepEvalBaseEmbeddingModel(ABC):
108
- def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
109
- self.model_name = parse_model_name(model_name)
110
-
111
- self.model = self.load_model(*args, **kwargs)
87
+ def __init__(self, model: Optional[str] = None, *args, **kwargs):
88
+ self.name = parse_model_name(model)
89
+ self.model = self.load_model()
112
90
 
113
91
  @abstractmethod
114
92
  def load_model(self, *args, **kwargs) -> "DeepEvalBaseEmbeddingModel":
@@ -157,4 +135,4 @@ class DeepEvalBaseEmbeddingModel(ABC):
157
135
 
158
136
  @abstractmethod
159
137
  def get_model_name(self, *args, **kwargs) -> str:
160
- pass
138
+ return self.name
@@ -2,3 +2,10 @@ from .azure_embedding_model import AzureOpenAIEmbeddingModel
2
2
  from .openai_embedding_model import OpenAIEmbeddingModel
3
3
  from .local_embedding_model import LocalEmbeddingModel
4
4
  from .ollama_embedding_model import OllamaEmbeddingModel
5
+
6
+ __all__ = [
7
+ "AzureOpenAIEmbeddingModel",
8
+ "OpenAIEmbeddingModel",
9
+ "LocalEmbeddingModel",
10
+ "OllamaEmbeddingModel",
11
+ ]
@@ -9,53 +9,77 @@ from deepeval.models.retry_policy import (
9
9
  sdk_retries_for,
10
10
  )
11
11
  from deepeval.constants import ProviderSlug as PS
12
- from deepeval.models.utils import require_secret_api_key
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ normalize_kwargs_and_extract_aliases,
15
+ )
13
16
 
14
17
 
15
18
  retry_azure = create_retry_decorator(PS.AZURE)
16
19
 
20
+ _ALIAS_MAP = {
21
+ "api_key": ["openai_api_key"],
22
+ "base_url": ["azure_endpoint"],
23
+ "deployment_name": ["azure_deployment"],
24
+ }
25
+
17
26
 
18
27
  class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
19
28
  def __init__(
20
29
  self,
21
- openai_api_key: Optional[str] = None,
22
- openai_api_version: Optional[str] = None,
23
- azure_endpoint: Optional[str] = None,
24
- azure_deployment: Optional[str] = None,
25
30
  model: Optional[str] = None,
31
+ api_key: Optional[str] = None,
32
+ base_url: Optional[str] = None,
33
+ deployment_name: Optional[str] = None,
34
+ openai_api_version: Optional[str] = None,
26
35
  generation_kwargs: Optional[Dict] = None,
27
- **client_kwargs,
36
+ **kwargs,
28
37
  ):
38
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
39
+ "AzureOpenAIEmbeddingModel",
40
+ kwargs,
41
+ _ALIAS_MAP,
42
+ )
43
+
44
+ # re-map depricated keywords to re-named positional args
45
+ if api_key is None and "api_key" in alias_values:
46
+ api_key = alias_values["api_key"]
47
+ if base_url is None and "base_url" in alias_values:
48
+ base_url = alias_values["base_url"]
49
+ if deployment_name is None and "deployment_name" in alias_values:
50
+ deployment_name = alias_values["deployment_name"]
51
+
29
52
  settings = get_settings()
30
53
 
31
- if openai_api_key is not None:
54
+ if api_key is not None:
32
55
  # keep it secret, keep it safe from serializings, logging and alike
33
- self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
56
+ self.api_key: SecretStr | None = SecretStr(api_key)
34
57
  else:
35
- self.openai_api_key = settings.AZURE_OPENAI_API_KEY
58
+ self.api_key = settings.AZURE_OPENAI_API_KEY
36
59
 
37
60
  self.openai_api_version = (
38
61
  openai_api_version or settings.OPENAI_API_VERSION
39
62
  )
40
- self.azure_endpoint = (
41
- azure_endpoint
63
+ self.base_url = (
64
+ base_url
42
65
  or settings.AZURE_OPENAI_ENDPOINT
43
66
  and str(settings.AZURE_OPENAI_ENDPOINT)
44
67
  )
45
68
 
46
- self.azure_deployment = (
47
- azure_deployment or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
69
+ self.deployment_name = (
70
+ deployment_name or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
48
71
  )
49
- self.client_kwargs = client_kwargs or {}
50
- self.model_name = model or self.azure_deployment
72
+ # Keep sanitized kwargs for client call to strip legacy keys
73
+ self.kwargs = normalized_kwargs
74
+ model = model or self.deployment_name
51
75
  self.generation_kwargs = generation_kwargs or {}
52
- super().__init__(self.model_name)
76
+ super().__init__(model)
53
77
 
54
78
  @retry_azure
55
79
  def embed_text(self, text: str) -> List[float]:
56
80
  client = self.load_model(async_mode=False)
57
81
  response = client.embeddings.create(
58
- input=text, model=self.model_name, **self.generation_kwargs
82
+ input=text, model=self.name, **self.generation_kwargs
59
83
  )
60
84
  return response.data[0].embedding
61
85
 
@@ -63,7 +87,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
63
87
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
64
88
  client = self.load_model(async_mode=False)
65
89
  response = client.embeddings.create(
66
- input=texts, model=self.model_name, **self.generation_kwargs
90
+ input=texts, model=self.name, **self.generation_kwargs
67
91
  )
68
92
  return [item.embedding for item in response.data]
69
93
 
@@ -71,7 +95,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
71
95
  async def a_embed_text(self, text: str) -> List[float]:
72
96
  client = self.load_model(async_mode=True)
73
97
  response = await client.embeddings.create(
74
- input=text, model=self.model_name, **self.generation_kwargs
98
+ input=text, model=self.name, **self.generation_kwargs
75
99
  )
76
100
  return response.data[0].embedding
77
101
 
@@ -79,13 +103,10 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
79
103
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
80
104
  client = self.load_model(async_mode=True)
81
105
  response = await client.embeddings.create(
82
- input=texts, model=self.model_name, **self.generation_kwargs
106
+ input=texts, model=self.name, **self.generation_kwargs
83
107
  )
84
108
  return [item.embedding for item in response.data]
85
109
 
86
- def get_model_name(self) -> str:
87
- return self.model_name
88
-
89
110
  def load_model(self, async_mode: bool = False):
90
111
  if not async_mode:
91
112
  return self._build_client(AzureOpenAI)
@@ -93,21 +114,21 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
93
114
 
94
115
  def _build_client(self, cls):
95
116
  api_key = require_secret_api_key(
96
- self.openai_api_key,
117
+ self.api_key,
97
118
  provider_label="AzureOpenAI",
98
119
  env_var_name="AZURE_OPENAI_API_KEY",
99
- param_hint="`openai_api_key` to AzureOpenAIEmbeddingModel(...)",
120
+ param_hint="`api_key` to AzureOpenAIEmbeddingModel(...)",
100
121
  )
101
122
 
102
- client_kwargs = self.client_kwargs.copy()
123
+ client_kwargs = self.kwargs.copy()
103
124
  if not sdk_retries_for(PS.AZURE):
104
125
  client_kwargs["max_retries"] = 0
105
126
 
106
127
  client_init_kwargs = dict(
107
128
  api_key=api_key,
108
129
  api_version=self.openai_api_version,
109
- azure_endpoint=self.azure_endpoint,
110
- azure_deployment=self.azure_deployment,
130
+ azure_endpoint=self.base_url,
131
+ azure_deployment=self.deployment_name,
111
132
  **client_kwargs,
112
133
  )
113
134
  try:
@@ -118,3 +139,6 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
118
139
  client_init_kwargs.pop("max_retries", None)
119
140
  return cls(**client_init_kwargs)
120
141
  raise
142
+
143
+ def get_model_name(self):
144
+ return f"{self.name} (Azure)"
@@ -3,7 +3,9 @@ from typing import Dict, List, Optional
3
3
  from pydantic import SecretStr
4
4
 
5
5
  from deepeval.config.settings import get_settings
6
- from deepeval.models.utils import require_secret_api_key
6
+ from deepeval.models.utils import (
7
+ require_secret_api_key,
8
+ )
7
9
  from deepeval.models import DeepEvalBaseEmbeddingModel
8
10
  from deepeval.models.retry_policy import (
9
11
  create_retry_decorator,
@@ -19,12 +21,13 @@ retry_local = create_retry_decorator(PS.LOCAL)
19
21
  class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
20
22
  def __init__(
21
23
  self,
24
+ model: Optional[str] = None,
22
25
  api_key: Optional[str] = None,
23
26
  base_url: Optional[str] = None,
24
- model: Optional[str] = None,
25
27
  generation_kwargs: Optional[Dict] = None,
26
- **client_kwargs,
28
+ **kwargs,
27
29
  ):
30
+
28
31
  settings = get_settings()
29
32
  if api_key is not None:
30
33
  # keep it secret, keep it safe from serializings, logging and alike
@@ -37,16 +40,17 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
37
40
  or settings.LOCAL_EMBEDDING_BASE_URL
38
41
  and str(settings.LOCAL_EMBEDDING_BASE_URL)
39
42
  )
40
- self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
41
- self.client_kwargs = client_kwargs or {}
43
+ model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
44
+ # Keep sanitized kwargs for client call to strip legacy keys
45
+ self.kwargs = kwargs
42
46
  self.generation_kwargs = generation_kwargs or {}
43
- super().__init__(self.model_name)
47
+ super().__init__(model)
44
48
 
45
49
  @retry_local
46
50
  def embed_text(self, text: str) -> List[float]:
47
51
  embedding_model = self.load_model()
48
52
  response = embedding_model.embeddings.create(
49
- model=self.model_name, input=[text], **self.generation_kwargs
53
+ model=self.name, input=[text], **self.generation_kwargs
50
54
  )
51
55
  return response.data[0].embedding
52
56
 
@@ -54,7 +58,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
54
58
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
55
59
  embedding_model = self.load_model()
56
60
  response = embedding_model.embeddings.create(
57
- model=self.model_name, input=texts, **self.generation_kwargs
61
+ model=self.name, input=texts, **self.generation_kwargs
58
62
  )
59
63
  return [data.embedding for data in response.data]
60
64
 
@@ -62,7 +66,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
62
66
  async def a_embed_text(self, text: str) -> List[float]:
63
67
  embedding_model = self.load_model(async_mode=True)
64
68
  response = await embedding_model.embeddings.create(
65
- model=self.model_name, input=[text], **self.generation_kwargs
69
+ model=self.name, input=[text], **self.generation_kwargs
66
70
  )
67
71
  return response.data[0].embedding
68
72
 
@@ -70,7 +74,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
70
74
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
71
75
  embedding_model = self.load_model(async_mode=True)
72
76
  response = await embedding_model.embeddings.create(
73
- model=self.model_name, input=texts, **self.generation_kwargs
77
+ model=self.name, input=texts, **self.generation_kwargs
74
78
  )
75
79
  return [data.embedding for data in response.data]
76
80
 
@@ -78,9 +82,6 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
78
82
  # Model
79
83
  ###############################################
80
84
 
81
- def get_model_name(self):
82
- return self.model_name
83
-
84
85
  def load_model(self, async_mode: bool = False):
85
86
  if not async_mode:
86
87
  return self._build_client(OpenAI)
@@ -94,7 +95,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
94
95
  param_hint="`api_key` to LocalEmbeddingModel(...)",
95
96
  )
96
97
 
97
- client_kwargs = self.client_kwargs.copy()
98
+ client_kwargs = self.kwargs.copy()
98
99
  if not sdk_retries_for(PS.LOCAL):
99
100
  client_kwargs["max_retries"] = 0
100
101
 
@@ -111,3 +112,6 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
111
112
  client_init_kwargs.pop("max_retries", None)
112
113
  return cls(**client_init_kwargs)
113
114
  raise
115
+
116
+ def get_model_name(self):
117
+ return f"{self.name} (Local Model)"