deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -2,8 +2,12 @@ from typing import Dict, Optional, List
|
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
3
|
from pydantic import SecretStr
|
|
4
4
|
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
5
6
|
from deepeval.config.settings import get_settings
|
|
6
|
-
from deepeval.models.utils import
|
|
7
|
+
from deepeval.models.utils import (
|
|
8
|
+
require_secret_api_key,
|
|
9
|
+
normalize_kwargs_and_extract_aliases,
|
|
10
|
+
)
|
|
7
11
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
8
12
|
from deepeval.models.retry_policy import (
|
|
9
13
|
create_retry_decorator,
|
|
@@ -19,37 +23,53 @@ valid_openai_embedding_models = [
|
|
|
19
23
|
"text-embedding-3-large",
|
|
20
24
|
"text-embedding-ada-002",
|
|
21
25
|
]
|
|
26
|
+
|
|
22
27
|
default_openai_embedding_model = "text-embedding-3-small"
|
|
23
28
|
|
|
29
|
+
_ALIAS_MAP = {
|
|
30
|
+
"api_key": ["openai_api_key"],
|
|
31
|
+
}
|
|
32
|
+
|
|
24
33
|
|
|
25
34
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
26
35
|
|
|
27
36
|
def __init__(
|
|
28
37
|
self,
|
|
29
38
|
model: Optional[str] = None,
|
|
30
|
-
|
|
39
|
+
api_key: Optional[str] = None,
|
|
31
40
|
generation_kwargs: Optional[Dict] = None,
|
|
32
|
-
**
|
|
41
|
+
**kwargs,
|
|
33
42
|
):
|
|
34
|
-
|
|
43
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
44
|
+
"OpenAIEmbeddingModel",
|
|
45
|
+
kwargs,
|
|
46
|
+
_ALIAS_MAP,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# re-map depricated keywords to re-named positional args
|
|
50
|
+
if api_key is None and "api_key" in alias_values:
|
|
51
|
+
api_key = alias_values["api_key"]
|
|
52
|
+
|
|
53
|
+
if api_key is not None:
|
|
35
54
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
36
|
-
self.
|
|
55
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
37
56
|
else:
|
|
38
|
-
self.
|
|
57
|
+
self.api_key = get_settings().OPENAI_API_KEY
|
|
39
58
|
|
|
40
|
-
|
|
41
|
-
if
|
|
42
|
-
raise
|
|
59
|
+
model = model if model else default_openai_embedding_model
|
|
60
|
+
if model not in valid_openai_embedding_models:
|
|
61
|
+
raise DeepEvalError(
|
|
43
62
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
44
63
|
)
|
|
45
|
-
self.
|
|
64
|
+
self.kwargs = normalized_kwargs
|
|
46
65
|
self.generation_kwargs = generation_kwargs or {}
|
|
66
|
+
super().__init__(model)
|
|
47
67
|
|
|
48
68
|
@retry_openai
|
|
49
69
|
def embed_text(self, text: str) -> List[float]:
|
|
50
70
|
client = self.load_model(async_mode=False)
|
|
51
71
|
response = client.embeddings.create(
|
|
52
|
-
input=text, model=self.
|
|
72
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
53
73
|
)
|
|
54
74
|
return response.data[0].embedding
|
|
55
75
|
|
|
@@ -57,7 +77,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
57
77
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
58
78
|
client = self.load_model(async_mode=False)
|
|
59
79
|
response = client.embeddings.create(
|
|
60
|
-
input=texts, model=self.
|
|
80
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
61
81
|
)
|
|
62
82
|
return [item.embedding for item in response.data]
|
|
63
83
|
|
|
@@ -65,7 +85,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
85
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
66
86
|
client = self.load_model(async_mode=True)
|
|
67
87
|
response = await client.embeddings.create(
|
|
68
|
-
input=text, model=self.
|
|
88
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
69
89
|
)
|
|
70
90
|
return response.data[0].embedding
|
|
71
91
|
|
|
@@ -73,7 +93,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
73
93
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
74
94
|
client = self.load_model(async_mode=True)
|
|
75
95
|
response = await client.embeddings.create(
|
|
76
|
-
input=texts, model=self.
|
|
96
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
77
97
|
)
|
|
78
98
|
return [item.embedding for item in response.data]
|
|
79
99
|
|
|
@@ -81,28 +101,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
81
101
|
# Model
|
|
82
102
|
###############################################
|
|
83
103
|
|
|
84
|
-
def get_model_name(self):
|
|
85
|
-
return self.model_name
|
|
86
|
-
|
|
87
104
|
def load_model(self, async_mode: bool = False):
|
|
88
105
|
if not async_mode:
|
|
89
106
|
return self._build_client(OpenAI)
|
|
90
107
|
return self._build_client(AsyncOpenAI)
|
|
91
108
|
|
|
92
109
|
def _build_client(self, cls):
|
|
93
|
-
|
|
94
|
-
self.
|
|
110
|
+
api_key = require_secret_api_key(
|
|
111
|
+
self.api_key,
|
|
95
112
|
provider_label="OpenAI",
|
|
96
113
|
env_var_name="OPENAI_API_KEY",
|
|
97
|
-
param_hint="`
|
|
114
|
+
param_hint="`api_key` to OpenAIEmbeddingModel(...)",
|
|
98
115
|
)
|
|
99
116
|
|
|
100
|
-
client_kwargs = self.
|
|
117
|
+
client_kwargs = self.kwargs.copy()
|
|
101
118
|
if not sdk_retries_for(PS.OPENAI):
|
|
102
119
|
client_kwargs["max_retries"] = 0
|
|
103
120
|
|
|
104
121
|
client_init_kwargs = dict(
|
|
105
|
-
api_key=
|
|
122
|
+
api_key=api_key,
|
|
106
123
|
**client_kwargs,
|
|
107
124
|
)
|
|
108
125
|
try:
|
|
@@ -113,3 +130,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
113
130
|
client_init_kwargs.pop("max_retries", None)
|
|
114
131
|
return cls(**client_init_kwargs)
|
|
115
132
|
raise
|
|
133
|
+
|
|
134
|
+
def get_model_name(self):
|
|
135
|
+
return f"{self.name} (OpenAI)"
|
|
@@ -1,131 +1,285 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict
|
|
1
|
+
import base64
|
|
2
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
4
3
|
from contextlib import AsyncExitStack
|
|
5
|
-
from pydantic import BaseModel
|
|
6
4
|
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
|
|
7
|
+
from deepeval.config.settings import get_settings
|
|
8
|
+
from deepeval.utils import (
|
|
9
|
+
require_dependency,
|
|
10
|
+
require_param,
|
|
11
|
+
)
|
|
7
12
|
from deepeval.models.retry_policy import (
|
|
8
13
|
create_retry_decorator,
|
|
9
14
|
sdk_retries_for,
|
|
10
15
|
)
|
|
16
|
+
from deepeval.test_case import MLLMImage
|
|
17
|
+
from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
|
|
11
18
|
from deepeval.models import DeepEvalBaseLLM
|
|
19
|
+
from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
|
|
12
20
|
from deepeval.models.llms.utils import trim_and_load_json, safe_asyncio_run
|
|
13
21
|
from deepeval.constants import ProviderSlug as PS
|
|
22
|
+
from deepeval.models.utils import (
|
|
23
|
+
require_costs,
|
|
24
|
+
normalize_kwargs_and_extract_aliases,
|
|
25
|
+
)
|
|
14
26
|
|
|
15
|
-
# check aiobotocore availability
|
|
16
|
-
try:
|
|
17
|
-
from aiobotocore.session import get_session
|
|
18
|
-
from botocore.config import Config
|
|
19
|
-
|
|
20
|
-
aiobotocore_available = True
|
|
21
|
-
except ImportError:
|
|
22
|
-
aiobotocore_available = False
|
|
23
27
|
|
|
24
|
-
# define retry policy
|
|
25
28
|
retry_bedrock = create_retry_decorator(PS.BEDROCK)
|
|
26
29
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"Install them via your package manager (e.g. pip install aiobotocore botocore)"
|
|
33
|
-
)
|
|
30
|
+
_ALIAS_MAP = {
|
|
31
|
+
"model": ["model_id"],
|
|
32
|
+
"cost_per_input_token": ["input_token_cost"],
|
|
33
|
+
"cost_per_output_token": ["output_token_cost"],
|
|
34
|
+
}
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
37
38
|
def __init__(
|
|
38
39
|
self,
|
|
39
|
-
|
|
40
|
-
region_name: str,
|
|
40
|
+
model: Optional[str] = None,
|
|
41
41
|
aws_access_key_id: Optional[str] = None,
|
|
42
42
|
aws_secret_access_key: Optional[str] = None,
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
cost_per_input_token: Optional[float] = None,
|
|
44
|
+
cost_per_output_token: Optional[float] = None,
|
|
45
|
+
region: Optional[str] = None,
|
|
45
46
|
generation_kwargs: Optional[Dict] = None,
|
|
46
47
|
**kwargs,
|
|
47
48
|
):
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
49
|
+
settings = get_settings()
|
|
50
|
+
|
|
51
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
52
|
+
"AmazonBedrockModel",
|
|
53
|
+
kwargs,
|
|
54
|
+
_ALIAS_MAP,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Backwards compatibility for renamed params
|
|
58
|
+
if model is None and "model" in alias_values:
|
|
59
|
+
model = alias_values["model"]
|
|
60
|
+
if (
|
|
61
|
+
cost_per_input_token is None
|
|
62
|
+
and "cost_per_input_token" in alias_values
|
|
63
|
+
):
|
|
64
|
+
cost_per_input_token = alias_values["cost_per_input_token"]
|
|
65
|
+
if (
|
|
66
|
+
cost_per_output_token is None
|
|
67
|
+
and "cost_per_output_token" in alias_values
|
|
68
|
+
):
|
|
69
|
+
cost_per_output_token = alias_values["cost_per_output_token"]
|
|
70
|
+
|
|
71
|
+
# Secrets: prefer explicit args -> settings -> then AWS default chain
|
|
72
|
+
if aws_access_key_id is not None:
|
|
73
|
+
self.aws_access_key_id: Optional[SecretStr] = SecretStr(
|
|
74
|
+
aws_access_key_id
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
self.aws_access_key_id = settings.AWS_ACCESS_KEY_ID
|
|
78
|
+
|
|
79
|
+
if aws_secret_access_key is not None:
|
|
80
|
+
self.aws_secret_access_key: Optional[SecretStr] = SecretStr(
|
|
81
|
+
aws_secret_access_key
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
self.aws_secret_access_key = settings.AWS_SECRET_ACCESS_KEY
|
|
85
|
+
|
|
86
|
+
# Dependencies: aiobotocore & botocore
|
|
87
|
+
aiobotocore_session = require_dependency(
|
|
88
|
+
"aiobotocore.session",
|
|
89
|
+
provider_label="AmazonBedrockModel",
|
|
90
|
+
install_hint="Install it with `pip install aiobotocore`.",
|
|
91
|
+
)
|
|
92
|
+
self.botocore_module = require_dependency(
|
|
93
|
+
"botocore",
|
|
94
|
+
provider_label="AmazonBedrockModel",
|
|
95
|
+
install_hint="Install it with `pip install botocore`.",
|
|
96
|
+
)
|
|
97
|
+
self._session = aiobotocore_session.get_session()
|
|
60
98
|
self._exit_stack = AsyncExitStack()
|
|
61
|
-
|
|
99
|
+
|
|
100
|
+
# Defaults from settings
|
|
101
|
+
model = model or settings.AWS_BEDROCK_MODEL_NAME
|
|
102
|
+
region = region or settings.AWS_BEDROCK_REGION
|
|
103
|
+
|
|
104
|
+
cost_per_input_token = (
|
|
105
|
+
cost_per_input_token
|
|
106
|
+
if cost_per_input_token is not None
|
|
107
|
+
else settings.AWS_BEDROCK_COST_PER_INPUT_TOKEN
|
|
108
|
+
)
|
|
109
|
+
cost_per_output_token = (
|
|
110
|
+
cost_per_output_token
|
|
111
|
+
if cost_per_output_token is not None
|
|
112
|
+
else settings.AWS_BEDROCK_COST_PER_OUTPUT_TOKEN
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Required params
|
|
116
|
+
model = require_param(
|
|
117
|
+
model,
|
|
118
|
+
provider_label="AmazonBedrockModel",
|
|
119
|
+
env_var_name="AWS_BEDROCK_MODEL_NAME",
|
|
120
|
+
param_hint="model",
|
|
121
|
+
)
|
|
122
|
+
region = require_param(
|
|
123
|
+
region,
|
|
124
|
+
provider_label="AmazonBedrockModel",
|
|
125
|
+
env_var_name="AWS_BEDROCK_REGION",
|
|
126
|
+
param_hint="region",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.model_data = BEDROCK_MODELS_DATA.get(model)
|
|
130
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
131
|
+
self.model_data,
|
|
132
|
+
model,
|
|
133
|
+
"AWS_BEDROCK_COST_PER_INPUT_TOKEN",
|
|
134
|
+
"AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
|
|
135
|
+
cost_per_input_token,
|
|
136
|
+
cost_per_output_token,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Final attributes
|
|
140
|
+
self.region = region
|
|
141
|
+
self.cost_per_input_token = float(cost_per_input_token or 0.0)
|
|
142
|
+
self.cost_per_output_token = float(cost_per_output_token or 0.0)
|
|
143
|
+
|
|
144
|
+
self.kwargs = normalized_kwargs
|
|
62
145
|
self.generation_kwargs = generation_kwargs or {}
|
|
63
146
|
self._client = None
|
|
64
147
|
self._sdk_retry_mode: Optional[bool] = None
|
|
65
148
|
|
|
149
|
+
super().__init__(model)
|
|
150
|
+
|
|
66
151
|
###############################################
|
|
67
152
|
# Generate functions
|
|
68
153
|
###############################################
|
|
69
154
|
|
|
70
155
|
def generate(
|
|
71
156
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
72
|
-
) -> Tuple[Union[str,
|
|
157
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
73
158
|
return safe_asyncio_run(self.a_generate(prompt, schema))
|
|
74
159
|
|
|
75
160
|
@retry_bedrock
|
|
76
161
|
async def a_generate(
|
|
77
162
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
78
|
-
) -> Tuple[Union[str,
|
|
79
|
-
|
|
163
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
164
|
+
if check_if_multimodal(prompt):
|
|
165
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
166
|
+
payload = self.generate_payload(prompt)
|
|
167
|
+
else:
|
|
80
168
|
payload = self.get_converse_request_body(prompt)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
169
|
+
|
|
170
|
+
payload = self.get_converse_request_body(prompt)
|
|
171
|
+
client = await self._ensure_client()
|
|
172
|
+
response = await client.converse(
|
|
173
|
+
modelId=self.get_model_name(),
|
|
174
|
+
messages=payload["messages"],
|
|
175
|
+
inferenceConfig=payload["inferenceConfig"],
|
|
176
|
+
)
|
|
177
|
+
message = response["output"]["message"]["content"][0]["text"]
|
|
178
|
+
cost = self.calculate_cost(
|
|
179
|
+
response["usage"]["inputTokens"],
|
|
180
|
+
response["usage"]["outputTokens"],
|
|
181
|
+
)
|
|
182
|
+
if schema is None:
|
|
183
|
+
return message, cost
|
|
184
|
+
else:
|
|
185
|
+
json_output = trim_and_load_json(message)
|
|
186
|
+
return schema.model_validate(json_output), cost
|
|
187
|
+
|
|
188
|
+
def generate_payload(
|
|
189
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
190
|
+
):
|
|
191
|
+
multimodal_input = [] if multimodal_input is None else multimodal_input
|
|
192
|
+
content = []
|
|
193
|
+
for element in multimodal_input:
|
|
194
|
+
if isinstance(element, str):
|
|
195
|
+
content.append({"text": element})
|
|
196
|
+
elif isinstance(element, MLLMImage):
|
|
197
|
+
# Bedrock doesn't support external URLs - must convert everything to bytes
|
|
198
|
+
element.ensure_images_loaded()
|
|
199
|
+
|
|
200
|
+
image_format = (
|
|
201
|
+
(element.mimeType or "image/jpeg").split("/")[-1].upper()
|
|
202
|
+
)
|
|
203
|
+
image_format = "JPEG" if image_format == "JPG" else image_format
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
image_raw_bytes = base64.b64decode(element.dataBase64)
|
|
207
|
+
except Exception:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Invalid base64 data in MLLMImage: {element._id}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
content.append(
|
|
213
|
+
{
|
|
214
|
+
"image": {
|
|
215
|
+
"format": image_format,
|
|
216
|
+
"source": {"bytes": image_raw_bytes},
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
"messages": [{"role": "user", "content": content}],
|
|
223
|
+
"inferenceConfig": {
|
|
224
|
+
**self.generation_kwargs,
|
|
225
|
+
},
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
#########################
|
|
229
|
+
# Capabilities #
|
|
230
|
+
#########################
|
|
231
|
+
|
|
232
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
233
|
+
return self.model_data.supports_log_probs
|
|
234
|
+
|
|
235
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
236
|
+
return self.model_data.supports_temperature
|
|
237
|
+
|
|
238
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
239
|
+
return self.model_data.supports_multimodal
|
|
240
|
+
|
|
241
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
242
|
+
return self.model_data.supports_structured_outputs
|
|
243
|
+
|
|
244
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
245
|
+
return self.model_data.supports_json
|
|
99
246
|
|
|
100
247
|
###############################################
|
|
101
248
|
# Client management
|
|
102
249
|
###############################################
|
|
103
250
|
|
|
104
251
|
async def _ensure_client(self):
|
|
252
|
+
|
|
105
253
|
use_sdk = sdk_retries_for(PS.BEDROCK)
|
|
106
254
|
|
|
107
255
|
# only rebuild if client is missing or the sdk retry mode changes
|
|
108
256
|
if self._client is None or self._sdk_retry_mode != use_sdk:
|
|
109
|
-
# Close any previous
|
|
110
|
-
if self._client is not None:
|
|
111
|
-
await self._exit_stack.aclose()
|
|
112
|
-
self._client = None
|
|
113
257
|
|
|
114
258
|
# create retry config for botocore
|
|
115
259
|
retries_config = {"max_attempts": (5 if use_sdk else 1)}
|
|
116
260
|
if use_sdk:
|
|
117
261
|
retries_config["mode"] = "adaptive"
|
|
118
262
|
|
|
263
|
+
Config = self.botocore_module.config.Config
|
|
119
264
|
config = Config(retries=retries_config)
|
|
120
265
|
|
|
121
|
-
|
|
122
|
-
"
|
|
123
|
-
|
|
124
|
-
aws_access_key_id=self.aws_access_key_id,
|
|
125
|
-
aws_secret_access_key=self.aws_secret_access_key,
|
|
126
|
-
config=config,
|
|
266
|
+
client_kwargs = {
|
|
267
|
+
"region_name": self.region,
|
|
268
|
+
"config": config,
|
|
127
269
|
**self.kwargs,
|
|
128
|
-
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if self.aws_access_key_id is not None:
|
|
273
|
+
client_kwargs["aws_access_key_id"] = (
|
|
274
|
+
self.aws_access_key_id.get_secret_value()
|
|
275
|
+
)
|
|
276
|
+
if self.aws_secret_access_key is not None:
|
|
277
|
+
client_kwargs["aws_secret_access_key"] = (
|
|
278
|
+
self.aws_secret_access_key.get_secret_value()
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
cm = self._session.create_client("bedrock-runtime", **client_kwargs)
|
|
282
|
+
|
|
129
283
|
self._client = await self._exit_stack.enter_async_context(cm)
|
|
130
284
|
self._sdk_retry_mode = use_sdk
|
|
131
285
|
|
|
@@ -150,12 +304,12 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
150
304
|
|
|
151
305
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
152
306
|
return (
|
|
153
|
-
input_tokens * self.
|
|
154
|
-
+ output_tokens * self.
|
|
307
|
+
input_tokens * self.cost_per_input_token
|
|
308
|
+
+ output_tokens * self.cost_per_output_token
|
|
155
309
|
)
|
|
156
310
|
|
|
157
311
|
def load_model(self):
|
|
158
312
|
pass
|
|
159
313
|
|
|
160
314
|
def get_model_name(self) -> str:
|
|
161
|
-
return self.
|
|
315
|
+
return self.name
|