deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +106 -21
  5. deepeval/evaluate/evaluate.py +0 -3
  6. deepeval/evaluate/execute.py +10 -222
  7. deepeval/evaluate/utils.py +6 -30
  8. deepeval/key_handler.py +3 -0
  9. deepeval/metrics/__init__.py +0 -4
  10. deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
  11. deepeval/metrics/answer_relevancy/template.py +102 -179
  12. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  13. deepeval/metrics/arena_g_eval/template.py +17 -1
  14. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  15. deepeval/metrics/argument_correctness/template.py +19 -2
  16. deepeval/metrics/base_metric.py +13 -41
  17. deepeval/metrics/bias/bias.py +102 -108
  18. deepeval/metrics/bias/template.py +14 -2
  19. deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
  20. deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
  22. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  23. deepeval/metrics/conversation_completeness/template.py +23 -3
  24. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  25. deepeval/metrics/conversational_dag/nodes.py +66 -123
  26. deepeval/metrics/conversational_dag/templates.py +16 -0
  27. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  28. deepeval/metrics/dag/dag.py +10 -0
  29. deepeval/metrics/dag/nodes.py +63 -126
  30. deepeval/metrics/dag/templates.py +14 -0
  31. deepeval/metrics/exact_match/exact_match.py +9 -1
  32. deepeval/metrics/faithfulness/faithfulness.py +82 -136
  33. deepeval/metrics/g_eval/g_eval.py +87 -78
  34. deepeval/metrics/g_eval/template.py +18 -1
  35. deepeval/metrics/g_eval/utils.py +7 -6
  36. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  37. deepeval/metrics/goal_accuracy/template.py +21 -3
  38. deepeval/metrics/hallucination/hallucination.py +60 -75
  39. deepeval/metrics/hallucination/template.py +13 -0
  40. deepeval/metrics/indicator.py +3 -6
  41. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  42. deepeval/metrics/json_correctness/template.py +10 -0
  43. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  44. deepeval/metrics/knowledge_retention/schema.py +9 -3
  45. deepeval/metrics/knowledge_retention/template.py +12 -0
  46. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  47. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  48. deepeval/metrics/mcp/template.py +52 -0
  49. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  50. deepeval/metrics/mcp_use_metric/template.py +12 -0
  51. deepeval/metrics/misuse/misuse.py +77 -97
  52. deepeval/metrics/misuse/template.py +15 -0
  53. deepeval/metrics/multimodal_metrics/__init__.py +0 -1
  54. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
  55. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
  56. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
  57. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
  58. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
  59. deepeval/metrics/non_advice/non_advice.py +79 -105
  60. deepeval/metrics/non_advice/template.py +12 -0
  61. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  62. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  63. deepeval/metrics/pii_leakage/template.py +14 -0
  64. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  65. deepeval/metrics/plan_adherence/template.py +11 -0
  66. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  67. deepeval/metrics/plan_quality/template.py +9 -0
  68. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  69. deepeval/metrics/prompt_alignment/template.py +12 -0
  70. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  71. deepeval/metrics/role_adherence/template.py +14 -0
  72. deepeval/metrics/role_violation/role_violation.py +75 -108
  73. deepeval/metrics/role_violation/template.py +12 -0
  74. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  75. deepeval/metrics/step_efficiency/template.py +11 -0
  76. deepeval/metrics/summarization/summarization.py +115 -183
  77. deepeval/metrics/summarization/template.py +19 -0
  78. deepeval/metrics/task_completion/task_completion.py +67 -73
  79. deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
  80. deepeval/metrics/tool_use/tool_use.py +42 -66
  81. deepeval/metrics/topic_adherence/template.py +13 -0
  82. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  83. deepeval/metrics/toxicity/template.py +13 -0
  84. deepeval/metrics/toxicity/toxicity.py +80 -99
  85. deepeval/metrics/turn_contextual_precision/schema.py +3 -3
  86. deepeval/metrics/turn_contextual_precision/template.py +1 -1
  87. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
  88. deepeval/metrics/turn_contextual_recall/schema.py +3 -3
  89. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
  90. deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
  91. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
  92. deepeval/metrics/turn_faithfulness/schema.py +1 -1
  93. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
  94. deepeval/metrics/turn_relevancy/template.py +14 -0
  95. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  96. deepeval/metrics/utils.py +145 -90
  97. deepeval/models/base_model.py +44 -6
  98. deepeval/models/embedding_models/azure_embedding_model.py +34 -12
  99. deepeval/models/embedding_models/local_embedding_model.py +22 -7
  100. deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
  101. deepeval/models/embedding_models/openai_embedding_model.py +3 -2
  102. deepeval/models/llms/amazon_bedrock_model.py +226 -71
  103. deepeval/models/llms/anthropic_model.py +141 -47
  104. deepeval/models/llms/azure_model.py +167 -94
  105. deepeval/models/llms/constants.py +2032 -0
  106. deepeval/models/llms/deepseek_model.py +79 -29
  107. deepeval/models/llms/gemini_model.py +126 -67
  108. deepeval/models/llms/grok_model.py +125 -59
  109. deepeval/models/llms/kimi_model.py +126 -81
  110. deepeval/models/llms/litellm_model.py +92 -18
  111. deepeval/models/llms/local_model.py +114 -15
  112. deepeval/models/llms/ollama_model.py +97 -76
  113. deepeval/models/llms/openai_model.py +167 -310
  114. deepeval/models/llms/portkey_model.py +58 -16
  115. deepeval/models/llms/utils.py +5 -2
  116. deepeval/models/utils.py +60 -4
  117. deepeval/simulator/conversation_simulator.py +43 -0
  118. deepeval/simulator/template.py +13 -0
  119. deepeval/test_case/api.py +24 -45
  120. deepeval/test_case/arena_test_case.py +7 -2
  121. deepeval/test_case/conversational_test_case.py +55 -6
  122. deepeval/test_case/llm_test_case.py +60 -6
  123. deepeval/test_run/api.py +3 -0
  124. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
  125. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
  126. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  127. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  128. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  129. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
  130. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  131. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  132. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  133. {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -12,7 +12,7 @@ from deepeval.models.retry_policy import (
12
12
  sdk_retries_for,
13
13
  )
14
14
  from deepeval.constants import ProviderSlug as PS
15
-
15
+ from deepeval.utils import require_param
16
16
 
17
17
  # consistent retry rules
18
18
  retry_local = create_retry_decorator(PS.LOCAL)
@@ -31,16 +31,31 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
31
31
  settings = get_settings()
32
32
  if api_key is not None:
33
33
  # keep it secret, keep it safe from serializings, logging and alike
34
- self.api_key: SecretStr | None = SecretStr(api_key)
34
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
35
35
  else:
36
36
  self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
37
37
 
38
- self.base_url = (
39
- base_url
40
- or settings.LOCAL_EMBEDDING_BASE_URL
41
- and str(settings.LOCAL_EMBEDDING_BASE_URL)
42
- )
38
+ if base_url is not None:
39
+ base_url = str(base_url).rstrip("/")
40
+ elif settings.LOCAL_EMBEDDING_BASE_URL is not None:
41
+ base_url = str(settings.LOCAL_EMBEDDING_BASE_URL).rstrip("/")
42
+
43
43
  model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
44
+ # validation
45
+ model = require_param(
46
+ model,
47
+ provider_label="LocalEmbeddingModel",
48
+ env_var_name="LOCAL_EMBEDDING_MODEL_NAME",
49
+ param_hint="model",
50
+ )
51
+
52
+ self.base_url = require_param(
53
+ base_url,
54
+ provider_label="LocalEmbeddingModel",
55
+ env_var_name="LOCAL_EMBEDDING_BASE_URL",
56
+ param_hint="base_url",
57
+ )
58
+
44
59
  # Keep sanitized kwargs for client call to strip legacy keys
45
60
  self.kwargs = kwargs
46
61
  self.generation_kwargs = generation_kwargs or {}
@@ -10,7 +10,7 @@ from deepeval.models.retry_policy import (
10
10
  create_retry_decorator,
11
11
  )
12
12
  from deepeval.constants import ProviderSlug as PS
13
-
13
+ from deepeval.utils import require_param
14
14
 
15
15
  retry_ollama = create_retry_decorator(PS.OLLAMA)
16
16
 
@@ -37,12 +37,23 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
37
37
 
38
38
  settings = get_settings()
39
39
 
40
- self.base_url = (
41
- base_url
42
- or settings.LOCAL_EMBEDDING_BASE_URL
43
- and str(settings.LOCAL_EMBEDDING_BASE_URL)
44
- )
40
+ if base_url is not None:
41
+ self.base_url = str(base_url).rstrip("/")
42
+ elif settings.LOCAL_EMBEDDING_BASE_URL is not None:
43
+ self.base_url = str(settings.LOCAL_EMBEDDING_BASE_URL).rstrip("/")
44
+ else:
45
+ self.base_url = "http://localhost:11434"
46
+
45
47
  model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
48
+
49
+ # validation
50
+ model = require_param(
51
+ model,
52
+ provider_label="OllamaEmbeddingModel",
53
+ env_var_name="LOCAL_EMBEDDING_MODEL_NAME",
54
+ param_hint="model",
55
+ )
56
+
46
57
  # Keep sanitized kwargs for client call to strip legacy keys
47
58
  self.kwargs = normalized_kwargs
48
59
  self.generation_kwargs = generation_kwargs or {}
@@ -2,6 +2,7 @@ from typing import Dict, Optional, List
2
2
  from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import SecretStr
4
4
 
5
+ from deepeval.errors import DeepEvalError
5
6
  from deepeval.config.settings import get_settings
6
7
  from deepeval.models.utils import (
7
8
  require_secret_api_key,
@@ -51,13 +52,13 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
51
52
 
52
53
  if api_key is not None:
53
54
  # keep it secret, keep it safe from serializings, logging and alike
54
- self.api_key: SecretStr | None = SecretStr(api_key)
55
+ self.api_key: Optional[SecretStr] = SecretStr(api_key)
55
56
  else:
56
57
  self.api_key = get_settings().OPENAI_API_KEY
57
58
 
58
59
  model = model if model else default_openai_embedding_model
59
60
  if model not in valid_openai_embedding_models:
60
- raise ValueError(
61
+ raise DeepEvalError(
61
62
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
62
63
  )
63
64
  self.kwargs = normalized_kwargs
@@ -1,130 +1,285 @@
1
- from typing import Optional, Tuple, Union, Dict
1
+ import base64
2
+ from typing import Optional, Tuple, Union, Dict, List
2
3
  from contextlib import AsyncExitStack
3
- from pydantic import BaseModel
4
4
 
5
+ from pydantic import BaseModel, SecretStr
6
+
7
+ from deepeval.config.settings import get_settings
8
+ from deepeval.utils import (
9
+ require_dependency,
10
+ require_param,
11
+ )
5
12
  from deepeval.models.retry_policy import (
6
13
  create_retry_decorator,
7
14
  sdk_retries_for,
8
15
  )
16
+ from deepeval.test_case import MLLMImage
17
+ from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
9
18
  from deepeval.models import DeepEvalBaseLLM
19
+ from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
10
20
  from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
11
21
  from deepeval.constants import ProviderSlug as PS
22
+ from deepeval.models.utils import (
23
+ require_costs,
24
+ normalize_kwargs_and_extract_aliases,
25
+ )
12
26
 
13
- # check aiobotocore availability
14
- try:
15
- from aiobotocore.session import get_session
16
- from botocore.config import Config
17
-
18
- aiobotocore_available = True
19
- except ImportError:
20
- aiobotocore_available = False
21
27
 
22
- # define retry policy
23
28
  retry_bedrock = create_retry_decorator(PS.BEDROCK)
24
29
 
25
-
26
- def _check_aiobotocore_available():
27
- if not aiobotocore_available:
28
- raise ImportError(
29
- "aiobotocore and botocore are required for this functionality. "
30
- "Install them via your package manager (e.g. pip install aiobotocore botocore)"
31
- )
30
+ _ALIAS_MAP = {
31
+ "model": ["model_id"],
32
+ "cost_per_input_token": ["input_token_cost"],
33
+ "cost_per_output_token": ["output_token_cost"],
34
+ }
32
35
 
33
36
 
34
37
  class AmazonBedrockModel(DeepEvalBaseLLM):
35
38
  def __init__(
36
39
  self,
37
- model_id: str,
38
- region_name: str,
40
+ model: Optional[str] = None,
39
41
  aws_access_key_id: Optional[str] = None,
40
42
  aws_secret_access_key: Optional[str] = None,
41
- input_token_cost: float = 0,
42
- output_token_cost: float = 0,
43
+ cost_per_input_token: Optional[float] = None,
44
+ cost_per_output_token: Optional[float] = None,
45
+ region: Optional[str] = None,
43
46
  generation_kwargs: Optional[Dict] = None,
44
47
  **kwargs,
45
48
  ):
46
- _check_aiobotocore_available()
47
- super().__init__(model_id)
48
-
49
- self.model_id = model_id
50
- self.region_name = region_name
51
- self.aws_access_key_id = aws_access_key_id
52
- self.aws_secret_access_key = aws_secret_access_key
53
- self.input_token_cost = input_token_cost
54
- self.output_token_cost = output_token_cost
55
-
56
- # prepare aiobotocore session, config, and async exit stack
57
- self._session = get_session()
49
+ settings = get_settings()
50
+
51
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
52
+ "AmazonBedrockModel",
53
+ kwargs,
54
+ _ALIAS_MAP,
55
+ )
56
+
57
+ # Backwards compatibility for renamed params
58
+ if model is None and "model" in alias_values:
59
+ model = alias_values["model"]
60
+ if (
61
+ cost_per_input_token is None
62
+ and "cost_per_input_token" in alias_values
63
+ ):
64
+ cost_per_input_token = alias_values["cost_per_input_token"]
65
+ if (
66
+ cost_per_output_token is None
67
+ and "cost_per_output_token" in alias_values
68
+ ):
69
+ cost_per_output_token = alias_values["cost_per_output_token"]
70
+
71
+ # Secrets: prefer explicit args -> settings -> then AWS default chain
72
+ if aws_access_key_id is not None:
73
+ self.aws_access_key_id: Optional[SecretStr] = SecretStr(
74
+ aws_access_key_id
75
+ )
76
+ else:
77
+ self.aws_access_key_id = settings.AWS_ACCESS_KEY_ID
78
+
79
+ if aws_secret_access_key is not None:
80
+ self.aws_secret_access_key: Optional[SecretStr] = SecretStr(
81
+ aws_secret_access_key
82
+ )
83
+ else:
84
+ self.aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY
85
+
86
+ # Dependencies: aiobotocore & botocore
87
+ aiobotocore_session = require_dependency(
88
+ "aiobotocore.session",
89
+ provider_label="AmazonBedrockModel",
90
+ install_hint="Install it with `pip install aiobotocore`.",
91
+ )
92
+ self.botocore_module = require_dependency(
93
+ "botocore",
94
+ provider_label="AmazonBedrockModel",
95
+ install_hint="Install it with `pip install botocore`.",
96
+ )
97
+ self._session = aiobotocore_session.get_session()
58
98
  self._exit_stack = AsyncExitStack()
59
- self.kwargs = kwargs
99
+
100
+ # Defaults from settings
101
+ model = model or settings.AWS_BEDROCK_MODEL_NAME
102
+ region = region or settings.AWS_BEDROCK_REGION
103
+
104
+ cost_per_input_token = (
105
+ cost_per_input_token
106
+ if cost_per_input_token is not None
107
+ else settings.AWS_BEDROCK_COST_PER_INPUT_TOKEN
108
+ )
109
+ cost_per_output_token = (
110
+ cost_per_output_token
111
+ if cost_per_output_token is not None
112
+ else settings.AWS_BEDROCK_COST_PER_OUTPUT_TOKEN
113
+ )
114
+
115
+ # Required params
116
+ model = require_param(
117
+ model,
118
+ provider_label="AmazonBedrockModel",
119
+ env_var_name="AWS_BEDROCK_MODEL_NAME",
120
+ param_hint="model",
121
+ )
122
+ region = require_param(
123
+ region,
124
+ provider_label="AmazonBedrockModel",
125
+ env_var_name="AWS_BEDROCK_REGION",
126
+ param_hint="region",
127
+ )
128
+
129
+ self.model_data = BEDROCK_MODELS_DATA.get(model)
130
+ cost_per_input_token, cost_per_output_token = require_costs(
131
+ self.model_data,
132
+ model,
133
+ "AWS_BEDROCK_COST_PER_INPUT_TOKEN",
134
+ "AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
135
+ cost_per_input_token,
136
+ cost_per_output_token,
137
+ )
138
+
139
+ # Final attributes
140
+ self.region = region
141
+ self.cost_per_input_token = float(cost_per_input_token or 0.0)
142
+ self.cost_per_output_token = float(cost_per_output_token or 0.0)
143
+
144
+ self.kwargs = normalized_kwargs
60
145
  self.generation_kwargs = generation_kwargs or {}
61
146
  self._client = None
62
147
  self._sdk_retry_mode: Optional[bool] = None
63
148
 
149
+ super().__init__(model)
150
+
64
151
  ###############################################
65
152
  # Generate functions
66
153
  ###############################################
67
154
 
68
155
  def generate(
69
156
  self, prompt: str, schema: Optional[BaseModel] = None
70
- ) -> Tuple[Union[str, Dict], float]:
157
+ ) -> Tuple[Union[str, BaseModel], float]:
71
158
  return safe_asyncio_run(self.a_generate(prompt, schema))
72
159
 
73
160
  @retry_bedrock
74
161
  async def a_generate(
75
162
  self, prompt: str, schema: Optional[BaseModel] = None
76
- ) -> Tuple[Union[str, Dict], float]:
77
-
78
- try:
163
+ ) -> Tuple[Union[str, BaseModel], float]:
164
+ if check_if_multimodal(prompt):
165
+ prompt = convert_to_multi_modal_array(input=prompt)
166
+ payload = self.generate_payload(prompt)
167
+ else:
79
168
  payload = self.get_converse_request_body(prompt)
80
- client = await self._ensure_client()
81
- response = await client.converse(
82
- modelId=self.model_id,
83
- messages=payload["messages"],
84
- inferenceConfig=payload["inferenceConfig"],
85
- )
86
- message = response["output"]["message"]["content"][0]["text"]
87
- cost = self.calculate_cost(
88
- response["usage"]["inputTokens"],
89
- response["usage"]["outputTokens"],
90
- )
91
- if schema is None:
92
- return message, cost
93
- else:
94
- json_output = trim_and_load_json(message)
95
- return schema.model_validate(json_output), cost
96
- finally:
97
- await self.close()
169
+
170
+ payload = self.get_converse_request_body(prompt)
171
+ client = await self._ensure_client()
172
+ response = await client.converse(
173
+ modelId=self.get_model_name(),
174
+ messages=payload["messages"],
175
+ inferenceConfig=payload["inferenceConfig"],
176
+ )
177
+ message = response["output"]["message"]["content"][0]["text"]
178
+ cost = self.calculate_cost(
179
+ response["usage"]["inputTokens"],
180
+ response["usage"]["outputTokens"],
181
+ )
182
+ if schema is None:
183
+ return message, cost
184
+ else:
185
+ json_output = trim_and_load_json(message)
186
+ return schema.model_validate(json_output), cost
187
+
188
+ def generate_payload(
189
+ self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
190
+ ):
191
+ multimodal_input = [] if multimodal_input is None else multimodal_input
192
+ content = []
193
+ for element in multimodal_input:
194
+ if isinstance(element, str):
195
+ content.append({"text": element})
196
+ elif isinstance(element, MLLMImage):
197
+ # Bedrock doesn't support external URLs - must convert everything to bytes
198
+ element.ensure_images_loaded()
199
+
200
+ image_format = (
201
+ (element.mimeType or "image/jpeg").split("/")[-1].upper()
202
+ )
203
+ image_format = "JPEG" if image_format == "JPG" else image_format
204
+
205
+ try:
206
+ image_raw_bytes = base64.b64decode(element.dataBase64)
207
+ except Exception:
208
+ raise ValueError(
209
+ f"Invalid base64 data in MLLMImage: {element._id}"
210
+ )
211
+
212
+ content.append(
213
+ {
214
+ "image": {
215
+ "format": image_format,
216
+ "source": {"bytes": image_raw_bytes},
217
+ }
218
+ }
219
+ )
220
+
221
+ return {
222
+ "messages": [{"role": "user", "content": content}],
223
+ "inferenceConfig": {
224
+ **self.generation_kwargs,
225
+ },
226
+ }
227
+
228
+ #########################
229
+ # Capabilities #
230
+ #########################
231
+
232
+ def supports_log_probs(self) -> Union[bool, None]:
233
+ return self.model_data.supports_log_probs
234
+
235
+ def supports_temperature(self) -> Union[bool, None]:
236
+ return self.model_data.supports_temperature
237
+
238
+ def supports_multimodal(self) -> Union[bool, None]:
239
+ return self.model_data.supports_multimodal
240
+
241
+ def supports_structured_outputs(self) -> Union[bool, None]:
242
+ return self.model_data.supports_structured_outputs
243
+
244
+ def supports_json_mode(self) -> Union[bool, None]:
245
+ return self.model_data.supports_json
98
246
 
99
247
  ###############################################
100
248
  # Client management
101
249
  ###############################################
102
250
 
103
251
  async def _ensure_client(self):
252
+
104
253
  use_sdk = sdk_retries_for(PS.BEDROCK)
105
254
 
106
255
  # only rebuild if client is missing or the sdk retry mode changes
107
256
  if self._client is None or self._sdk_retry_mode != use_sdk:
108
- # Close any previous
109
- if self._client is not None:
110
- await self._exit_stack.aclose()
111
- self._client = None
112
257
 
113
258
  # create retry config for botocore
114
259
  retries_config = {"max_attempts": (5 if use_sdk else 1)}
115
260
  if use_sdk:
116
261
  retries_config["mode"] = "adaptive"
117
262
 
263
+ Config = self.botocore_module.config.Config
118
264
  config = Config(retries=retries_config)
119
265
 
120
- cm = self._session.create_client(
121
- "bedrock-runtime",
122
- region_name=self.region_name,
123
- aws_access_key_id=self.aws_access_key_id,
124
- aws_secret_access_key=self.aws_secret_access_key,
125
- config=config,
266
+ client_kwargs = {
267
+ "region_name": self.region,
268
+ "config": config,
126
269
  **self.kwargs,
127
- )
270
+ }
271
+
272
+ if self.aws_access_key_id is not None:
273
+ client_kwargs["aws_access_key_id"] = (
274
+ self.aws_access_key_id.get_secret_value()
275
+ )
276
+ if self.aws_secret_access_key is not None:
277
+ client_kwargs["aws_secret_access_key"] = (
278
+ self.aws_secret_access_key.get_secret_value()
279
+ )
280
+
281
+ cm = self._session.create_client("bedrock-runtime", **client_kwargs)
282
+
128
283
  self._client = await self._exit_stack.enter_async_context(cm)
129
284
  self._sdk_retry_mode = use_sdk
130
285
 
@@ -149,12 +304,12 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
149
304
 
150
305
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
151
306
  return (
152
- input_tokens * self.input_token_cost
153
- + output_tokens * self.output_token_cost
307
+ input_tokens * self.cost_per_input_token
308
+ + output_tokens * self.cost_per_output_token
154
309
  )
155
310
 
156
311
  def load_model(self):
157
312
  pass
158
313
 
159
314
  def get_model_name(self) -> str:
160
- return self.model_id
315
+ return self.name