deepeval 3.7.5__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +106 -21
- deepeval/evaluate/evaluate.py +0 -3
- deepeval/evaluate/execute.py +10 -222
- deepeval/evaluate/utils.py +6 -30
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +0 -4
- deepeval/metrics/answer_relevancy/answer_relevancy.py +89 -132
- deepeval/metrics/answer_relevancy/template.py +102 -179
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -41
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +56 -92
- deepeval/metrics/contextual_recall/contextual_recall.py +58 -85
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +53 -83
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +14 -0
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +82 -136
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +3 -6
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -1
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +37 -38
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +55 -76
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +37 -38
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +37 -38
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +57 -76
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +43 -42
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +3 -3
- deepeval/metrics/turn_contextual_precision/template.py +1 -1
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +110 -68
- deepeval/metrics/turn_contextual_recall/schema.py +3 -3
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +104 -61
- deepeval/metrics/turn_contextual_relevancy/schema.py +2 -2
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +106 -65
- deepeval/metrics/turn_faithfulness/schema.py +1 -1
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +104 -73
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +145 -90
- deepeval/models/base_model.py +44 -6
- deepeval/models/embedding_models/azure_embedding_model.py +34 -12
- deepeval/models/embedding_models/local_embedding_model.py +22 -7
- deepeval/models/embedding_models/ollama_embedding_model.py +17 -6
- deepeval/models/embedding_models/openai_embedding_model.py +3 -2
- deepeval/models/llms/amazon_bedrock_model.py +226 -71
- deepeval/models/llms/anthropic_model.py +141 -47
- deepeval/models/llms/azure_model.py +167 -94
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +79 -29
- deepeval/models/llms/gemini_model.py +126 -67
- deepeval/models/llms/grok_model.py +125 -59
- deepeval/models/llms/kimi_model.py +126 -81
- deepeval/models/llms/litellm_model.py +92 -18
- deepeval/models/llms/local_model.py +114 -15
- deepeval/models/llms/ollama_model.py +97 -76
- deepeval/models/llms/openai_model.py +167 -310
- deepeval/models/llms/portkey_model.py +58 -16
- deepeval/models/llms/utils.py +5 -2
- deepeval/models/utils.py +60 -4
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/api.py +24 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +55 -6
- deepeval/test_case/llm_test_case.py +60 -6
- deepeval/test_run/api.py +3 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -1
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/RECORD +128 -132
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -133
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.5.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -12,7 +12,7 @@ from deepeval.models.retry_policy import (
|
|
|
12
12
|
sdk_retries_for,
|
|
13
13
|
)
|
|
14
14
|
from deepeval.constants import ProviderSlug as PS
|
|
15
|
-
|
|
15
|
+
from deepeval.utils import require_param
|
|
16
16
|
|
|
17
17
|
# consistent retry rules
|
|
18
18
|
retry_local = create_retry_decorator(PS.LOCAL)
|
|
@@ -31,16 +31,31 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
31
31
|
settings = get_settings()
|
|
32
32
|
if api_key is not None:
|
|
33
33
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
34
|
-
self.api_key: SecretStr
|
|
34
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
35
35
|
else:
|
|
36
36
|
self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
base_url
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
if base_url is not None:
|
|
39
|
+
base_url = str(base_url).rstrip("/")
|
|
40
|
+
elif settings.LOCAL_EMBEDDING_BASE_URL is not None:
|
|
41
|
+
base_url = str(settings.LOCAL_EMBEDDING_BASE_URL).rstrip("/")
|
|
42
|
+
|
|
43
43
|
model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
44
|
+
# validation
|
|
45
|
+
model = require_param(
|
|
46
|
+
model,
|
|
47
|
+
provider_label="LocalEmbeddingModel",
|
|
48
|
+
env_var_name="LOCAL_EMBEDDING_MODEL_NAME",
|
|
49
|
+
param_hint="model",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
self.base_url = require_param(
|
|
53
|
+
base_url,
|
|
54
|
+
provider_label="LocalEmbeddingModel",
|
|
55
|
+
env_var_name="LOCAL_EMBEDDING_BASE_URL",
|
|
56
|
+
param_hint="base_url",
|
|
57
|
+
)
|
|
58
|
+
|
|
44
59
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
45
60
|
self.kwargs = kwargs
|
|
46
61
|
self.generation_kwargs = generation_kwargs or {}
|
|
@@ -10,7 +10,7 @@ from deepeval.models.retry_policy import (
|
|
|
10
10
|
create_retry_decorator,
|
|
11
11
|
)
|
|
12
12
|
from deepeval.constants import ProviderSlug as PS
|
|
13
|
-
|
|
13
|
+
from deepeval.utils import require_param
|
|
14
14
|
|
|
15
15
|
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
16
16
|
|
|
@@ -37,12 +37,23 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
37
37
|
|
|
38
38
|
settings = get_settings()
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
base_url
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
if base_url is not None:
|
|
41
|
+
self.base_url = str(base_url).rstrip("/")
|
|
42
|
+
elif settings.LOCAL_EMBEDDING_BASE_URL is not None:
|
|
43
|
+
self.base_url = str(settings.LOCAL_EMBEDDING_BASE_URL).rstrip("/")
|
|
44
|
+
else:
|
|
45
|
+
self.base_url = "http://localhost:11434"
|
|
46
|
+
|
|
45
47
|
model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
48
|
+
|
|
49
|
+
# validation
|
|
50
|
+
model = require_param(
|
|
51
|
+
model,
|
|
52
|
+
provider_label="OllamaEmbeddingModel",
|
|
53
|
+
env_var_name="LOCAL_EMBEDDING_MODEL_NAME",
|
|
54
|
+
param_hint="model",
|
|
55
|
+
)
|
|
56
|
+
|
|
46
57
|
# Keep sanitized kwargs for client call to strip legacy keys
|
|
47
58
|
self.kwargs = normalized_kwargs
|
|
48
59
|
self.generation_kwargs = generation_kwargs or {}
|
|
@@ -2,6 +2,7 @@ from typing import Dict, Optional, List
|
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from pydantic import SecretStr
|
|
4
4
|
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
5
6
|
from deepeval.config.settings import get_settings
|
|
6
7
|
from deepeval.models.utils import (
|
|
7
8
|
require_secret_api_key,
|
|
@@ -51,13 +52,13 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
51
52
|
|
|
52
53
|
if api_key is not None:
|
|
53
54
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
54
|
-
self.api_key: SecretStr
|
|
55
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
55
56
|
else:
|
|
56
57
|
self.api_key = get_settings().OPENAI_API_KEY
|
|
57
58
|
|
|
58
59
|
model = model if model else default_openai_embedding_model
|
|
59
60
|
if model not in valid_openai_embedding_models:
|
|
60
|
-
raise
|
|
61
|
+
raise DeepEvalError(
|
|
61
62
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
62
63
|
)
|
|
63
64
|
self.kwargs = normalized_kwargs
|
|
@@ -1,130 +1,285 @@
|
|
|
1
|
-
|
|
1
|
+
import base64
|
|
2
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
2
3
|
from contextlib import AsyncExitStack
|
|
3
|
-
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
|
|
7
|
+
from deepeval.config.settings import get_settings
|
|
8
|
+
from deepeval.utils import (
|
|
9
|
+
require_dependency,
|
|
10
|
+
require_param,
|
|
11
|
+
)
|
|
5
12
|
from deepeval.models.retry_policy import (
|
|
6
13
|
create_retry_decorator,
|
|
7
14
|
sdk_retries_for,
|
|
8
15
|
)
|
|
16
|
+
from deepeval.test_case import MLLMImage
|
|
17
|
+
from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
|
|
9
18
|
from deepeval.models import DeepEvalBaseLLM
|
|
19
|
+
from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
|
|
10
20
|
from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
|
|
11
21
|
from deepeval.constants import ProviderSlug as PS
|
|
22
|
+
from deepeval.models.utils import (
|
|
23
|
+
require_costs,
|
|
24
|
+
normalize_kwargs_and_extract_aliases,
|
|
25
|
+
)
|
|
12
26
|
|
|
13
|
-
# check aiobotocore availability
|
|
14
|
-
try:
|
|
15
|
-
from aiobotocore.session import get_session
|
|
16
|
-
from botocore.config import Config
|
|
17
|
-
|
|
18
|
-
aiobotocore_available = True
|
|
19
|
-
except ImportError:
|
|
20
|
-
aiobotocore_available = False
|
|
21
27
|
|
|
22
|
-
# define retry policy
|
|
23
28
|
retry_bedrock = create_retry_decorator(PS.BEDROCK)
|
|
24
29
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"Install them via your package manager (e.g. pip install aiobotocore botocore)"
|
|
31
|
-
)
|
|
30
|
+
_ALIAS_MAP = {
|
|
31
|
+
"model": ["model_id"],
|
|
32
|
+
"cost_per_input_token": ["input_token_cost"],
|
|
33
|
+
"cost_per_output_token": ["output_token_cost"],
|
|
34
|
+
}
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
35
38
|
def __init__(
|
|
36
39
|
self,
|
|
37
|
-
|
|
38
|
-
region_name: str,
|
|
40
|
+
model: Optional[str] = None,
|
|
39
41
|
aws_access_key_id: Optional[str] = None,
|
|
40
42
|
aws_secret_access_key: Optional[str] = None,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
cost_per_input_token: Optional[float] = None,
|
|
44
|
+
cost_per_output_token: Optional[float] = None,
|
|
45
|
+
region: Optional[str] = None,
|
|
43
46
|
generation_kwargs: Optional[Dict] = None,
|
|
44
47
|
**kwargs,
|
|
45
48
|
):
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
49
|
+
settings = get_settings()
|
|
50
|
+
|
|
51
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
52
|
+
"AmazonBedrockModel",
|
|
53
|
+
kwargs,
|
|
54
|
+
_ALIAS_MAP,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Backwards compatibility for renamed params
|
|
58
|
+
if model is None and "model" in alias_values:
|
|
59
|
+
model = alias_values["model"]
|
|
60
|
+
if (
|
|
61
|
+
cost_per_input_token is None
|
|
62
|
+
and "cost_per_input_token" in alias_values
|
|
63
|
+
):
|
|
64
|
+
cost_per_input_token = alias_values["cost_per_input_token"]
|
|
65
|
+
if (
|
|
66
|
+
cost_per_output_token is None
|
|
67
|
+
and "cost_per_output_token" in alias_values
|
|
68
|
+
):
|
|
69
|
+
cost_per_output_token = alias_values["cost_per_output_token"]
|
|
70
|
+
|
|
71
|
+
# Secrets: prefer explicit args -> settings -> then AWS default chain
|
|
72
|
+
if aws_access_key_id is not None:
|
|
73
|
+
self.aws_access_key_id: Optional[SecretStr] = SecretStr(
|
|
74
|
+
aws_access_key_id
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
self.aws_access_key_id = settings.AWS_ACCESS_KEY_ID
|
|
78
|
+
|
|
79
|
+
if aws_secret_access_key is not None:
|
|
80
|
+
self.aws_secret_access_key: Optional[SecretStr] = SecretStr(
|
|
81
|
+
aws_secret_access_key
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
self.aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY
|
|
85
|
+
|
|
86
|
+
# Dependencies: aiobotocore & botocore
|
|
87
|
+
aiobotocore_session = require_dependency(
|
|
88
|
+
"aiobotocore.session",
|
|
89
|
+
provider_label="AmazonBedrockModel",
|
|
90
|
+
install_hint="Install it with `pip install aiobotocore`.",
|
|
91
|
+
)
|
|
92
|
+
self.botocore_module = require_dependency(
|
|
93
|
+
"botocore",
|
|
94
|
+
provider_label="AmazonBedrockModel",
|
|
95
|
+
install_hint="Install it with `pip install botocore`.",
|
|
96
|
+
)
|
|
97
|
+
self._session = aiobotocore_session.get_session()
|
|
58
98
|
self._exit_stack = AsyncExitStack()
|
|
59
|
-
|
|
99
|
+
|
|
100
|
+
# Defaults from settings
|
|
101
|
+
model = model or settings.AWS_BEDROCK_MODEL_NAME
|
|
102
|
+
region = region or settings.AWS_BEDROCK_REGION
|
|
103
|
+
|
|
104
|
+
cost_per_input_token = (
|
|
105
|
+
cost_per_input_token
|
|
106
|
+
if cost_per_input_token is not None
|
|
107
|
+
else settings.AWS_BEDROCK_COST_PER_INPUT_TOKEN
|
|
108
|
+
)
|
|
109
|
+
cost_per_output_token = (
|
|
110
|
+
cost_per_output_token
|
|
111
|
+
if cost_per_output_token is not None
|
|
112
|
+
else settings.AWS_BEDROCK_COST_PER_OUTPUT_TOKEN
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Required params
|
|
116
|
+
model = require_param(
|
|
117
|
+
model,
|
|
118
|
+
provider_label="AmazonBedrockModel",
|
|
119
|
+
env_var_name="AWS_BEDROCK_MODEL_NAME",
|
|
120
|
+
param_hint="model",
|
|
121
|
+
)
|
|
122
|
+
region = require_param(
|
|
123
|
+
region,
|
|
124
|
+
provider_label="AmazonBedrockModel",
|
|
125
|
+
env_var_name="AWS_BEDROCK_REGION",
|
|
126
|
+
param_hint="region",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.model_data = BEDROCK_MODELS_DATA.get(model)
|
|
130
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
131
|
+
self.model_data,
|
|
132
|
+
model,
|
|
133
|
+
"AWS_BEDROCK_COST_PER_INPUT_TOKEN",
|
|
134
|
+
"AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
|
|
135
|
+
cost_per_input_token,
|
|
136
|
+
cost_per_output_token,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Final attributes
|
|
140
|
+
self.region = region
|
|
141
|
+
self.cost_per_input_token = float(cost_per_input_token or 0.0)
|
|
142
|
+
self.cost_per_output_token = float(cost_per_output_token or 0.0)
|
|
143
|
+
|
|
144
|
+
self.kwargs = normalized_kwargs
|
|
60
145
|
self.generation_kwargs = generation_kwargs or {}
|
|
61
146
|
self._client = None
|
|
62
147
|
self._sdk_retry_mode: Optional[bool] = None
|
|
63
148
|
|
|
149
|
+
super().__init__(model)
|
|
150
|
+
|
|
64
151
|
###############################################
|
|
65
152
|
# Generate functions
|
|
66
153
|
###############################################
|
|
67
154
|
|
|
68
155
|
def generate(
|
|
69
156
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
70
|
-
) -> Tuple[Union[str,
|
|
157
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
71
158
|
return safe_asyncio_run(self.a_generate(prompt, schema))
|
|
72
159
|
|
|
73
160
|
@retry_bedrock
|
|
74
161
|
async def a_generate(
|
|
75
162
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
76
|
-
) -> Tuple[Union[str,
|
|
77
|
-
|
|
78
|
-
|
|
163
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
164
|
+
if check_if_multimodal(prompt):
|
|
165
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
166
|
+
payload = self.generate_payload(prompt)
|
|
167
|
+
else:
|
|
79
168
|
payload = self.get_converse_request_body(prompt)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
169
|
+
|
|
170
|
+
payload = self.get_converse_request_body(prompt)
|
|
171
|
+
client = await self._ensure_client()
|
|
172
|
+
response = await client.converse(
|
|
173
|
+
modelId=self.get_model_name(),
|
|
174
|
+
messages=payload["messages"],
|
|
175
|
+
inferenceConfig=payload["inferenceConfig"],
|
|
176
|
+
)
|
|
177
|
+
message = response["output"]["message"]["content"][0]["text"]
|
|
178
|
+
cost = self.calculate_cost(
|
|
179
|
+
response["usage"]["inputTokens"],
|
|
180
|
+
response["usage"]["outputTokens"],
|
|
181
|
+
)
|
|
182
|
+
if schema is None:
|
|
183
|
+
return message, cost
|
|
184
|
+
else:
|
|
185
|
+
json_output = trim_and_load_json(message)
|
|
186
|
+
return schema.model_validate(json_output), cost
|
|
187
|
+
|
|
188
|
+
def generate_payload(
|
|
189
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
190
|
+
):
|
|
191
|
+
multimodal_input = [] if multimodal_input is None else multimodal_input
|
|
192
|
+
content = []
|
|
193
|
+
for element in multimodal_input:
|
|
194
|
+
if isinstance(element, str):
|
|
195
|
+
content.append({"text": element})
|
|
196
|
+
elif isinstance(element, MLLMImage):
|
|
197
|
+
# Bedrock doesn't support external URLs - must convert everything to bytes
|
|
198
|
+
element.ensure_images_loaded()
|
|
199
|
+
|
|
200
|
+
image_format = (
|
|
201
|
+
(element.mimeType or "image/jpeg").split("/")[-1].upper()
|
|
202
|
+
)
|
|
203
|
+
image_format = "JPEG" if image_format == "JPG" else image_format
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
image_raw_bytes = base64.b64decode(element.dataBase64)
|
|
207
|
+
except Exception:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Invalid base64 data in MLLMImage: {element._id}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
content.append(
|
|
213
|
+
{
|
|
214
|
+
"image": {
|
|
215
|
+
"format": image_format,
|
|
216
|
+
"source": {"bytes": image_raw_bytes},
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
"messages": [{"role": "user", "content": content}],
|
|
223
|
+
"inferenceConfig": {
|
|
224
|
+
**self.generation_kwargs,
|
|
225
|
+
},
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
#########################
|
|
229
|
+
# Capabilities #
|
|
230
|
+
#########################
|
|
231
|
+
|
|
232
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
233
|
+
return self.model_data.supports_log_probs
|
|
234
|
+
|
|
235
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
236
|
+
return self.model_data.supports_temperature
|
|
237
|
+
|
|
238
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
239
|
+
return self.model_data.supports_multimodal
|
|
240
|
+
|
|
241
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
242
|
+
return self.model_data.supports_structured_outputs
|
|
243
|
+
|
|
244
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
245
|
+
return self.model_data.supports_json
|
|
98
246
|
|
|
99
247
|
###############################################
|
|
100
248
|
# Client management
|
|
101
249
|
###############################################
|
|
102
250
|
|
|
103
251
|
async def _ensure_client(self):
|
|
252
|
+
|
|
104
253
|
use_sdk = sdk_retries_for(PS.BEDROCK)
|
|
105
254
|
|
|
106
255
|
# only rebuild if client is missing or the sdk retry mode changes
|
|
107
256
|
if self._client is None or self._sdk_retry_mode != use_sdk:
|
|
108
|
-
# Close any previous
|
|
109
|
-
if self._client is not None:
|
|
110
|
-
await self._exit_stack.aclose()
|
|
111
|
-
self._client = None
|
|
112
257
|
|
|
113
258
|
# create retry config for botocore
|
|
114
259
|
retries_config = {"max_attempts": (5 if use_sdk else 1)}
|
|
115
260
|
if use_sdk:
|
|
116
261
|
retries_config["mode"] = "adaptive"
|
|
117
262
|
|
|
263
|
+
Config = self.botocore_module.config.Config
|
|
118
264
|
config = Config(retries=retries_config)
|
|
119
265
|
|
|
120
|
-
|
|
121
|
-
"
|
|
122
|
-
|
|
123
|
-
aws_access_key_id=self.aws_access_key_id,
|
|
124
|
-
aws_secret_access_key=self.aws_secret_access_key,
|
|
125
|
-
config=config,
|
|
266
|
+
client_kwargs = {
|
|
267
|
+
"region_name": self.region,
|
|
268
|
+
"config": config,
|
|
126
269
|
**self.kwargs,
|
|
127
|
-
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if self.aws_access_key_id is not None:
|
|
273
|
+
client_kwargs["aws_access_key_id"] = (
|
|
274
|
+
self.aws_access_key_id.get_secret_value()
|
|
275
|
+
)
|
|
276
|
+
if self.aws_secret_access_key is not None:
|
|
277
|
+
client_kwargs["aws_secret_access_key"] = (
|
|
278
|
+
self.aws_secret_access_key.get_secret_value()
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
cm = self._session.create_client("bedrock-runtime", **client_kwargs)
|
|
282
|
+
|
|
128
283
|
self._client = await self._exit_stack.enter_async_context(cm)
|
|
129
284
|
self._sdk_retry_mode = use_sdk
|
|
130
285
|
|
|
@@ -149,12 +304,12 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
149
304
|
|
|
150
305
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
151
306
|
return (
|
|
152
|
-
input_tokens * self.
|
|
153
|
-
+ output_tokens * self.
|
|
307
|
+
input_tokens * self.cost_per_input_token
|
|
308
|
+
+ output_tokens * self.cost_per_output_token
|
|
154
309
|
)
|
|
155
310
|
|
|
156
311
|
def load_model(self):
|
|
157
312
|
pass
|
|
158
313
|
|
|
159
314
|
def get_model_name(self) -> str:
|
|
160
|
-
return self.
|
|
315
|
+
return self.name
|