deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +139 -2
  5. deepeval/evaluate/evaluate.py +16 -11
  6. deepeval/evaluate/execute.py +13 -181
  7. deepeval/evaluate/utils.py +6 -26
  8. deepeval/integrations/pydantic_ai/agent.py +19 -2
  9. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  10. deepeval/key_handler.py +3 -0
  11. deepeval/metrics/__init__.py +14 -16
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
  13. deepeval/metrics/answer_relevancy/template.py +22 -3
  14. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  15. deepeval/metrics/arena_g_eval/template.py +17 -1
  16. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  17. deepeval/metrics/argument_correctness/template.py +19 -2
  18. deepeval/metrics/base_metric.py +13 -44
  19. deepeval/metrics/bias/bias.py +102 -108
  20. deepeval/metrics/bias/template.py +14 -2
  21. deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
  22. deepeval/metrics/contextual_precision/template.py +115 -66
  23. deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
  24. deepeval/metrics/contextual_recall/template.py +106 -55
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
  26. deepeval/metrics/contextual_relevancy/template.py +87 -58
  27. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  28. deepeval/metrics/conversation_completeness/template.py +23 -3
  29. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  30. deepeval/metrics/conversational_dag/nodes.py +66 -123
  31. deepeval/metrics/conversational_dag/templates.py +16 -0
  32. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  33. deepeval/metrics/dag/dag.py +10 -0
  34. deepeval/metrics/dag/nodes.py +63 -126
  35. deepeval/metrics/dag/templates.py +16 -2
  36. deepeval/metrics/exact_match/exact_match.py +9 -1
  37. deepeval/metrics/faithfulness/faithfulness.py +138 -149
  38. deepeval/metrics/faithfulness/schema.py +1 -1
  39. deepeval/metrics/faithfulness/template.py +200 -115
  40. deepeval/metrics/g_eval/g_eval.py +87 -78
  41. deepeval/metrics/g_eval/template.py +18 -1
  42. deepeval/metrics/g_eval/utils.py +7 -6
  43. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  44. deepeval/metrics/goal_accuracy/template.py +21 -3
  45. deepeval/metrics/hallucination/hallucination.py +60 -75
  46. deepeval/metrics/hallucination/template.py +13 -0
  47. deepeval/metrics/indicator.py +7 -10
  48. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  49. deepeval/metrics/json_correctness/template.py +10 -0
  50. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  51. deepeval/metrics/knowledge_retention/schema.py +9 -3
  52. deepeval/metrics/knowledge_retention/template.py +12 -0
  53. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  54. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  55. deepeval/metrics/mcp/template.py +52 -0
  56. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  57. deepeval/metrics/mcp_use_metric/template.py +12 -0
  58. deepeval/metrics/misuse/misuse.py +77 -97
  59. deepeval/metrics/misuse/template.py +15 -0
  60. deepeval/metrics/multimodal_metrics/__init__.py +0 -19
  61. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
  62. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
  63. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
  64. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
  65. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
  66. deepeval/metrics/non_advice/non_advice.py +79 -105
  67. deepeval/metrics/non_advice/template.py +12 -0
  68. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  69. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  70. deepeval/metrics/pii_leakage/template.py +14 -0
  71. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  72. deepeval/metrics/plan_adherence/template.py +11 -0
  73. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  74. deepeval/metrics/plan_quality/template.py +9 -0
  75. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  76. deepeval/metrics/prompt_alignment/template.py +12 -0
  77. deepeval/metrics/ragas.py +3 -3
  78. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  79. deepeval/metrics/role_adherence/template.py +14 -0
  80. deepeval/metrics/role_violation/role_violation.py +75 -108
  81. deepeval/metrics/role_violation/template.py +12 -0
  82. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  83. deepeval/metrics/step_efficiency/template.py +11 -0
  84. deepeval/metrics/summarization/summarization.py +115 -183
  85. deepeval/metrics/summarization/template.py +19 -0
  86. deepeval/metrics/task_completion/task_completion.py +67 -73
  87. deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
  88. deepeval/metrics/tool_use/tool_use.py +42 -66
  89. deepeval/metrics/topic_adherence/template.py +13 -0
  90. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  94. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
  96. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  97. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
  99. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  100. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
  102. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  103. deepeval/metrics/turn_faithfulness/template.py +218 -0
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +158 -122
  108. deepeval/models/__init__.py +0 -12
  109. deepeval/models/base_model.py +49 -33
  110. deepeval/models/embedding_models/__init__.py +7 -0
  111. deepeval/models/embedding_models/azure_embedding_model.py +79 -33
  112. deepeval/models/embedding_models/local_embedding_model.py +39 -20
  113. deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
  114. deepeval/models/embedding_models/openai_embedding_model.py +42 -22
  115. deepeval/models/llms/amazon_bedrock_model.py +226 -72
  116. deepeval/models/llms/anthropic_model.py +178 -63
  117. deepeval/models/llms/azure_model.py +218 -60
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +95 -40
  120. deepeval/models/llms/gemini_model.py +209 -64
  121. deepeval/models/llms/grok_model.py +139 -68
  122. deepeval/models/llms/kimi_model.py +140 -90
  123. deepeval/models/llms/litellm_model.py +131 -37
  124. deepeval/models/llms/local_model.py +125 -21
  125. deepeval/models/llms/ollama_model.py +147 -24
  126. deepeval/models/llms/openai_model.py +222 -269
  127. deepeval/models/llms/portkey_model.py +81 -22
  128. deepeval/models/llms/utils.py +8 -3
  129. deepeval/models/retry_policy.py +17 -14
  130. deepeval/models/utils.py +106 -5
  131. deepeval/optimizer/__init__.py +5 -0
  132. deepeval/optimizer/algorithms/__init__.py +6 -0
  133. deepeval/optimizer/algorithms/base.py +29 -0
  134. deepeval/optimizer/algorithms/configs.py +18 -0
  135. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  136. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  137. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  138. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  139. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  140. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  141. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  142. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  143. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  144. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  145. deepeval/{optimization → optimizer}/configs.py +5 -8
  146. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  147. deepeval/optimizer/prompt_optimizer.py +263 -0
  148. deepeval/optimizer/rewriter/__init__.py +5 -0
  149. deepeval/optimizer/rewriter/rewriter.py +124 -0
  150. deepeval/optimizer/rewriter/utils.py +214 -0
  151. deepeval/optimizer/scorer/__init__.py +5 -0
  152. deepeval/optimizer/scorer/base.py +86 -0
  153. deepeval/optimizer/scorer/scorer.py +316 -0
  154. deepeval/optimizer/scorer/utils.py +30 -0
  155. deepeval/optimizer/types.py +148 -0
  156. deepeval/{optimization → optimizer}/utils.py +47 -165
  157. deepeval/prompt/prompt.py +5 -9
  158. deepeval/simulator/conversation_simulator.py +43 -0
  159. deepeval/simulator/template.py +13 -0
  160. deepeval/test_case/__init__.py +1 -3
  161. deepeval/test_case/api.py +26 -45
  162. deepeval/test_case/arena_test_case.py +7 -2
  163. deepeval/test_case/conversational_test_case.py +68 -1
  164. deepeval/test_case/llm_test_case.py +206 -1
  165. deepeval/test_case/utils.py +4 -8
  166. deepeval/test_run/api.py +18 -14
  167. deepeval/test_run/test_run.py +3 -3
  168. deepeval/tracing/patchers.py +9 -4
  169. deepeval/tracing/tracing.py +2 -2
  170. deepeval/utils.py +65 -0
  171. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
  172. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
  173. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  174. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  175. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  176. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  177. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  178. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  179. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  180. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  181. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  182. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  183. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  184. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  185. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  186. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  187. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  188. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  189. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  190. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  191. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  192. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  193. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  194. deepeval/models/mlllms/__init__.py +0 -4
  195. deepeval/models/mlllms/azure_model.py +0 -343
  196. deepeval/models/mlllms/gemini_model.py +0 -313
  197. deepeval/models/mlllms/ollama_model.py +0 -175
  198. deepeval/models/mlllms/openai_model.py +0 -309
  199. deepeval/optimization/__init__.py +0 -13
  200. deepeval/optimization/adapters/__init__.py +0 -2
  201. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  202. deepeval/optimization/aggregates.py +0 -14
  203. deepeval/optimization/copro/configs.py +0 -31
  204. deepeval/optimization/gepa/__init__.py +0 -7
  205. deepeval/optimization/gepa/configs.py +0 -115
  206. deepeval/optimization/miprov2/configs.py +0 -134
  207. deepeval/optimization/miprov2/loop.py +0 -785
  208. deepeval/optimization/mutations/__init__.py +0 -0
  209. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  210. deepeval/optimization/policies/__init__.py +0 -16
  211. deepeval/optimization/policies/tie_breaker.py +0 -67
  212. deepeval/optimization/prompt_optimizer.py +0 -462
  213. deepeval/optimization/simba/__init__.py +0 -0
  214. deepeval/optimization/simba/configs.py +0 -33
  215. deepeval/optimization/types.py +0 -361
  216. deepeval/test_case/mllm_test_case.py +0 -170
  217. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  218. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  219. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  220. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  221. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  222. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  223. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  224. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -2,8 +2,12 @@ from typing import Dict, Optional, List
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import SecretStr
4
4
 
5
+ from deepeval.errors import DeepEvalError
5
6
  from deepeval.config.settings import get_settings
6
- from deepeval.models.utils import require_secret_api_key
7
+ from deepeval.models.utils import (
8
+ require_secret_api_key,
9
+ normalize_kwargs_and_extract_aliases,
10
+ )
7
11
  from deepeval.models import DeepEvalBaseEmbeddingModel
8
12
  from deepeval.models.retry_policy import (
9
13
  create_retry_decorator,
@@ -19,37 +23,53 @@ valid_openai_embedding_models = [
19
23
  "text-embedding-3-large",
20
24
  "text-embedding-ada-002",
21
25
  ]
26
+
22
27
  default_openai_embedding_model = "text-embedding-3-small"
23
28
 
29
+ _ALIAS_MAP = {
30
+ "api_key": ["openai_api_key"],
31
+ }
32
+
24
33
 
25
34
  class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
26
35
 
27
36
  def __init__(
28
37
  self,
29
38
  model: Optional[str] = None,
30
- openai_api_key: Optional[str] = None,
39
+ api_key: Optional[str] = None,
31
40
  generation_kwargs: Optional[Dict] = None,
32
- **client_kwargs,
41
+ **kwargs,
33
42
  ):
34
- if openai_api_key is not None:
43
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
44
+ "OpenAIEmbeddingModel",
45
+ kwargs,
46
+ _ALIAS_MAP,
47
+ )
48
+
49
+ # re-map depricated keywords to re-named positional args
50
+ if api_key is None and "api_key" in alias_values:
51
+ api_key = alias_values["api_key"]
52
+
53
+ if api_key is not None:
35
54
  # keep it secret, keep it safe from serializings, logging and alike
36
- self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
55
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
37
56
  else:
38
- self.openai_api_key = get_settings().OPENAI_API_KEY
57
+ self.api_key = get_settings().OPENAI_API_KEY
39
58
 
40
- self.model_name = model if model else default_openai_embedding_model
41
- if self.model_name not in valid_openai_embedding_models:
42
- raise ValueError(
59
+ model = model if model else default_openai_embedding_model
60
+ if model not in valid_openai_embedding_models:
61
+ raise DeepEvalError(
43
62
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
44
63
  )
45
- self.client_kwargs = client_kwargs or {}
64
+ self.kwargs = normalized_kwargs
46
65
  self.generation_kwargs = generation_kwargs or {}
66
+ super().__init__(model)
47
67
 
48
68
  @retry_openai
49
69
  def embed_text(self, text: str) -> List[float]:
50
70
  client = self.load_model(async_mode=False)
51
71
  response = client.embeddings.create(
52
- input=text, model=self.model_name, **self.generation_kwargs
72
+ input=text, model=self.name, **self.generation_kwargs
53
73
  )
54
74
  return response.data[0].embedding
55
75
 
@@ -57,7 +77,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
57
77
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
58
78
  client = self.load_model(async_mode=False)
59
79
  response = client.embeddings.create(
60
- input=texts, model=self.model_name, **self.generation_kwargs
80
+ input=texts, model=self.name, **self.generation_kwargs
61
81
  )
62
82
  return [item.embedding for item in response.data]
63
83
 
@@ -65,7 +85,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
65
85
  async def a_embed_text(self, text: str) -> List[float]:
66
86
  client = self.load_model(async_mode=True)
67
87
  response = await client.embeddings.create(
68
- input=text, model=self.model_name, **self.generation_kwargs
88
+ input=text, model=self.name, **self.generation_kwargs
69
89
  )
70
90
  return response.data[0].embedding
71
91
 
@@ -73,7 +93,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
73
93
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
74
94
  client = self.load_model(async_mode=True)
75
95
  response = await client.embeddings.create(
76
- input=texts, model=self.model_name, **self.generation_kwargs
96
+ input=texts, model=self.name, **self.generation_kwargs
77
97
  )
78
98
  return [item.embedding for item in response.data]
79
99
 
@@ -81,28 +101,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
81
101
  # Model
82
102
  ###############################################
83
103
 
84
- def get_model_name(self):
85
- return self.model_name
86
-
87
104
  def load_model(self, async_mode: bool = False):
88
105
  if not async_mode:
89
106
  return self._build_client(OpenAI)
90
107
  return self._build_client(AsyncOpenAI)
91
108
 
92
109
  def _build_client(self, cls):
93
- openai_api_key = require_secret_api_key(
94
- self.openai_api_key,
110
+ api_key = require_secret_api_key(
111
+ self.api_key,
95
112
  provider_label="OpenAI",
96
113
  env_var_name="OPENAI_API_KEY",
97
- param_hint="`openai_api_key` to OpenAIEmbeddingModel(...)",
114
+ param_hint="`api_key` to OpenAIEmbeddingModel(...)",
98
115
  )
99
116
 
100
- client_kwargs = self.client_kwargs.copy()
117
+ client_kwargs = self.kwargs.copy()
101
118
  if not sdk_retries_for(PS.OPENAI):
102
119
  client_kwargs["max_retries"] = 0
103
120
 
104
121
  client_init_kwargs = dict(
105
- api_key=openai_api_key,
122
+ api_key=api_key,
106
123
  **client_kwargs,
107
124
  )
108
125
  try:
@@ -113,3 +130,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
113
130
  client_init_kwargs.pop("max_retries", None)
114
131
  return cls(**client_init_kwargs)
115
132
  raise
133
+
134
+ def get_model_name(self):
135
+ return f"{self.name} (OpenAI)"
@@ -1,131 +1,285 @@
1
- import asyncio
2
-
3
- from typing import Optional, Tuple, Union, Dict
1
+ import base64
2
+ from typing import Optional, Tuple, Union, Dict, List
4
3
  from contextlib import AsyncExitStack
5
- from pydantic import BaseModel
6
4
 
5
+ from pydantic import BaseModel, SecretStr
6
+
7
+ from deepeval.config.settings import get_settings
8
+ from deepeval.utils import (
9
+ require_dependency,
10
+ require_param,
11
+ )
7
12
  from deepeval.models.retry_policy import (
8
13
  create_retry_decorator,
9
14
  sdk_retries_for,
10
15
  )
16
+ from deepeval.test_case import MLLMImage
17
+ from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
11
18
  from deepeval.models import DeepEvalBaseLLM
19
+ from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
12
20
  from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
13
21
  from deepeval.constants import ProviderSlug as PS
22
+ from deepeval.models.utils import (
23
+ require_costs,
24
+ normalize_kwargs_and_extract_aliases,
25
+ )
14
26
 
15
- # check aiobotocore availability
16
- try:
17
- from aiobotocore.session import get_session
18
- from botocore.config import Config
19
-
20
- aiobotocore_available = True
21
- except ImportError:
22
- aiobotocore_available = False
23
27
 
24
- # define retry policy
25
28
  retry_bedrock = create_retry_decorator(PS.BEDROCK)
26
29
 
27
-
28
- def _check_aiobotocore_available():
29
- if not aiobotocore_available:
30
- raise ImportError(
31
- "aiobotocore and botocore are required for this functionality. "
32
- "Install them via your package manager (e.g. pip install aiobotocore botocore)"
33
- )
30
+ _ALIAS_MAP = {
31
+ "model": ["model_id"],
32
+ "cost_per_input_token": ["input_token_cost"],
33
+ "cost_per_output_token": ["output_token_cost"],
34
+ }
34
35
 
35
36
 
36
37
  class AmazonBedrockModel(DeepEvalBaseLLM):
37
38
  def __init__(
38
39
  self,
39
- model_id: str,
40
- region_name: str,
40
+ model: Optional[str] = None,
41
41
  aws_access_key_id: Optional[str] = None,
42
42
  aws_secret_access_key: Optional[str] = None,
43
- input_token_cost: float = 0,
44
- output_token_cost: float = 0,
43
+ cost_per_input_token: Optional[float] = None,
44
+ cost_per_output_token: Optional[float] = None,
45
+ region: Optional[str] = None,
45
46
  generation_kwargs: Optional[Dict] = None,
46
47
  **kwargs,
47
48
  ):
48
- _check_aiobotocore_available()
49
- super().__init__(model_id)
50
-
51
- self.model_id = model_id
52
- self.region_name = region_name
53
- self.aws_access_key_id = aws_access_key_id
54
- self.aws_secret_access_key = aws_secret_access_key
55
- self.input_token_cost = input_token_cost
56
- self.output_token_cost = output_token_cost
57
-
58
- # prepare aiobotocore session, config, and async exit stack
59
- self._session = get_session()
49
+ settings = get_settings()
50
+
51
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
52
+ "AmazonBedrockModel",
53
+ kwargs,
54
+ _ALIAS_MAP,
55
+ )
56
+
57
+ # Backwards compatibility for renamed params
58
+ if model is None and "model" in alias_values:
59
+ model = alias_values["model"]
60
+ if (
61
+ cost_per_input_token is None
62
+ and "cost_per_input_token" in alias_values
63
+ ):
64
+ cost_per_input_token = alias_values["cost_per_input_token"]
65
+ if (
66
+ cost_per_output_token is None
67
+ and "cost_per_output_token" in alias_values
68
+ ):
69
+ cost_per_output_token = alias_values["cost_per_output_token"]
70
+
71
+ # Secrets: prefer explicit args -> settings -> then AWS default chain
72
+ if aws_access_key_id is not None:
73
+ self.aws_access_key_id: Optional[SecretStr] = SecretStr(
74
+ aws_access_key_id
75
+ )
76
+ else:
77
+ self.aws_access_key_id = settings.AWS_ACCESS_KEY_ID
78
+
79
+ if aws_secret_access_key is not None:
80
+ self.aws_secret_access_key: Optional[SecretStr] = SecretStr(
81
+ aws_secret_access_key
82
+ )
83
+ else:
84
+ self.aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY
85
+
86
+ # Dependencies: aiobotocore & botocore
87
+ aiobotocore_session = require_dependency(
88
+ "aiobotocore.session",
89
+ provider_label="AmazonBedrockModel",
90
+ install_hint="Install it with `pip install aiobotocore`.",
91
+ )
92
+ self.botocore_module = require_dependency(
93
+ "botocore",
94
+ provider_label="AmazonBedrockModel",
95
+ install_hint="Install it with `pip install botocore`.",
96
+ )
97
+ self._session = aiobotocore_session.get_session()
60
98
  self._exit_stack = AsyncExitStack()
61
- self.kwargs = kwargs
99
+
100
+ # Defaults from settings
101
+ model = model or settings.AWS_BEDROCK_MODEL_NAME
102
+ region = region or settings.AWS_BEDROCK_REGION
103
+
104
+ cost_per_input_token = (
105
+ cost_per_input_token
106
+ if cost_per_input_token is not None
107
+ else settings.AWS_BEDROCK_COST_PER_INPUT_TOKEN
108
+ )
109
+ cost_per_output_token = (
110
+ cost_per_output_token
111
+ if cost_per_output_token is not None
112
+ else settings.AWS_BEDROCK_COST_PER_OUTPUT_TOKEN
113
+ )
114
+
115
+ # Required params
116
+ model = require_param(
117
+ model,
118
+ provider_label="AmazonBedrockModel",
119
+ env_var_name="AWS_BEDROCK_MODEL_NAME",
120
+ param_hint="model",
121
+ )
122
+ region = require_param(
123
+ region,
124
+ provider_label="AmazonBedrockModel",
125
+ env_var_name="AWS_BEDROCK_REGION",
126
+ param_hint="region",
127
+ )
128
+
129
+ self.model_data = BEDROCK_MODELS_DATA.get(model)
130
+ cost_per_input_token, cost_per_output_token = require_costs(
131
+ self.model_data,
132
+ model,
133
+ "AWS_BEDROCK_COST_PER_INPUT_TOKEN",
134
+ "AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
135
+ cost_per_input_token,
136
+ cost_per_output_token,
137
+ )
138
+
139
+ # Final attributes
140
+ self.region = region
141
+ self.cost_per_input_token = float(cost_per_input_token or 0.0)
142
+ self.cost_per_output_token = float(cost_per_output_token or 0.0)
143
+
144
+ self.kwargs = normalized_kwargs
62
145
  self.generation_kwargs = generation_kwargs or {}
63
146
  self._client = None
64
147
  self._sdk_retry_mode: Optional[bool] = None
65
148
 
149
+ super().__init__(model)
150
+
66
151
  ###############################################
67
152
  # Generate functions
68
153
  ###############################################
69
154
 
70
155
  def generate(
71
156
  self, prompt: str, schema: Optional[BaseModel] = None
72
- ) -> Tuple[Union[str, Dict], float]:
157
+ ) -> Tuple[Union[str, BaseModel], float]:
73
158
  return safe_asyncio_run(self.a_generate(prompt, schema))
74
159
 
75
160
  @retry_bedrock
76
161
  async def a_generate(
77
162
  self, prompt: str, schema: Optional[BaseModel] = None
78
- ) -> Tuple[Union[str, Dict], float]:
79
- try:
163
+ ) -> Tuple[Union[str, BaseModel], float]:
164
+ if check_if_multimodal(prompt):
165
+ prompt = convert_to_multi_modal_array(input=prompt)
166
+ payload = self.generate_payload(prompt)
167
+ else:
80
168
  payload = self.get_converse_request_body(prompt)
81
- client = await self._ensure_client()
82
- response = await client.converse(
83
- modelId=self.model_id,
84
- messages=payload["messages"],
85
- inferenceConfig=payload["inferenceConfig"],
86
- )
87
- message = response["output"]["message"]["content"][0]["text"]
88
- cost = self.calculate_cost(
89
- response["usage"]["inputTokens"],
90
- response["usage"]["outputTokens"],
91
- )
92
- if schema is None:
93
- return message, cost
94
- else:
95
- json_output = trim_and_load_json(message)
96
- return schema.model_validate(json_output), cost
97
- finally:
98
- await self.close()
169
+
170
+ payload = self.get_converse_request_body(prompt)
171
+ client = await self._ensure_client()
172
+ response = await client.converse(
173
+ modelId=self.get_model_name(),
174
+ messages=payload["messages"],
175
+ inferenceConfig=payload["inferenceConfig"],
176
+ )
177
+ message = response["output"]["message"]["content"][0]["text"]
178
+ cost = self.calculate_cost(
179
+ response["usage"]["inputTokens"],
180
+ response["usage"]["outputTokens"],
181
+ )
182
+ if schema is None:
183
+ return message, cost
184
+ else:
185
+ json_output = trim_and_load_json(message)
186
+ return schema.model_validate(json_output), cost
187
+
188
+ def generate_payload(
189
+ self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
190
+ ):
191
+ multimodal_input = [] if multimodal_input is None else multimodal_input
192
+ content = []
193
+ for element in multimodal_input:
194
+ if isinstance(element, str):
195
+ content.append({"text": element})
196
+ elif isinstance(element, MLLMImage):
197
+ # Bedrock doesn't support external URLs - must convert everything to bytes
198
+ element.ensure_images_loaded()
199
+
200
+ image_format = (
201
+ (element.mimeType or "image/jpeg").split("/")[-1].upper()
202
+ )
203
+ image_format = "JPEG" if image_format == "JPG" else image_format
204
+
205
+ try:
206
+ image_raw_bytes = base64.b64decode(element.dataBase64)
207
+ except Exception:
208
+ raise ValueError(
209
+ f"Invalid base64 data in MLLMImage: {element._id}"
210
+ )
211
+
212
+ content.append(
213
+ {
214
+ "image": {
215
+ "format": image_format,
216
+ "source": {"bytes": image_raw_bytes},
217
+ }
218
+ }
219
+ )
220
+
221
+ return {
222
+ "messages": [{"role": "user", "content": content}],
223
+ "inferenceConfig": {
224
+ **self.generation_kwargs,
225
+ },
226
+ }
227
+
228
+ #########################
229
+ # Capabilities #
230
+ #########################
231
+
232
+ def supports_log_probs(self) -> Union[bool, None]:
233
+ return self.model_data.supports_log_probs
234
+
235
+ def supports_temperature(self) -> Union[bool, None]:
236
+ return self.model_data.supports_temperature
237
+
238
+ def supports_multimodal(self) -> Union[bool, None]:
239
+ return self.model_data.supports_multimodal
240
+
241
+ def supports_structured_outputs(self) -> Union[bool, None]:
242
+ return self.model_data.supports_structured_outputs
243
+
244
+ def supports_json_mode(self) -> Union[bool, None]:
245
+ return self.model_data.supports_json
99
246
 
100
247
  ###############################################
101
248
  # Client management
102
249
  ###############################################
103
250
 
104
251
  async def _ensure_client(self):
252
+
105
253
  use_sdk = sdk_retries_for(PS.BEDROCK)
106
254
 
107
255
  # only rebuild if client is missing or the sdk retry mode changes
108
256
  if self._client is None or self._sdk_retry_mode != use_sdk:
109
- # Close any previous
110
- if self._client is not None:
111
- await self._exit_stack.aclose()
112
- self._client = None
113
257
 
114
258
  # create retry config for botocore
115
259
  retries_config = {"max_attempts": (5 if use_sdk else 1)}
116
260
  if use_sdk:
117
261
  retries_config["mode"] = "adaptive"
118
262
 
263
+ Config = self.botocore_module.config.Config
119
264
  config = Config(retries=retries_config)
120
265
 
121
- cm = self._session.create_client(
122
- "bedrock-runtime",
123
- region_name=self.region_name,
124
- aws_access_key_id=self.aws_access_key_id,
125
- aws_secret_access_key=self.aws_secret_access_key,
126
- config=config,
266
+ client_kwargs = {
267
+ "region_name": self.region,
268
+ "config": config,
127
269
  **self.kwargs,
128
- )
270
+ }
271
+
272
+ if self.aws_access_key_id is not None:
273
+ client_kwargs["aws_access_key_id"] = (
274
+ self.aws_access_key_id.get_secret_value()
275
+ )
276
+ if self.aws_secret_access_key is not None:
277
+ client_kwargs["aws_secret_access_key"] = (
278
+ self.aws_secret_access_key.get_secret_value()
279
+ )
280
+
281
+ cm = self._session.create_client("bedrock-runtime", **client_kwargs)
282
+
129
283
  self._client = await self._exit_stack.enter_async_context(cm)
130
284
  self._sdk_retry_mode = use_sdk
131
285
 
@@ -150,12 +304,12 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
150
304
 
151
305
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
152
306
  return (
153
- input_tokens * self.input_token_cost
154
- + output_tokens * self.output_token_cost
307
+ input_tokens * self.cost_per_input_token
308
+ + output_tokens * self.cost_per_output_token
155
309
  )
156
310
 
157
311
  def load_model(self):
158
312
  pass
159
313
 
160
314
  def get_model_name(self) -> str:
161
- return self.model_id
315
+ return self.name