deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +70 -26
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/dag/templates.py +2 -2
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +52 -28
- deepeval/models/embedding_models/local_embedding_model.py +18 -14
- deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
- deepeval/models/embedding_models/openai_embedding_model.py +40 -21
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +44 -23
- deepeval/models/llms/azure_model.py +121 -36
- deepeval/models/llms/deepseek_model.py +18 -13
- deepeval/models/llms/gemini_model.py +129 -43
- deepeval/models/llms/grok_model.py +18 -13
- deepeval/models/llms/kimi_model.py +18 -13
- deepeval/models/llms/litellm_model.py +42 -22
- deepeval/models/llms/local_model.py +12 -7
- deepeval/models/llms/ollama_model.py +114 -12
- deepeval/models/llms/openai_model.py +137 -41
- deepeval/models/llms/portkey_model.py +24 -7
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +46 -1
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from ollama import Client, AsyncClient
|
|
2
1
|
from typing import List, Optional, Dict
|
|
3
2
|
|
|
4
3
|
from deepeval.config.settings import get_settings
|
|
4
|
+
from deepeval.utils import require_dependency
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
|
+
from deepeval.models.utils import (
|
|
7
|
+
normalize_kwargs_and_extract_aliases,
|
|
8
|
+
)
|
|
6
9
|
from deepeval.models.retry_policy import (
|
|
7
10
|
create_retry_decorator,
|
|
8
11
|
)
|
|
@@ -11,32 +14,45 @@ from deepeval.constants import ProviderSlug as PS
|
|
|
11
14
|
|
|
12
15
|
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
13
16
|
|
|
17
|
+
_ALIAS_MAP = {"base_url": ["host"]}
|
|
18
|
+
|
|
14
19
|
|
|
15
20
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
16
21
|
def __init__(
|
|
17
22
|
self,
|
|
18
23
|
model: Optional[str] = None,
|
|
19
|
-
|
|
24
|
+
base_url: Optional[str] = None,
|
|
20
25
|
generation_kwargs: Optional[Dict] = None,
|
|
21
|
-
**
|
|
26
|
+
**kwargs,
|
|
22
27
|
):
|
|
28
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
29
|
+
"OllamaEmbeddingModel",
|
|
30
|
+
kwargs,
|
|
31
|
+
_ALIAS_MAP,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# re-map depricated keywords to re-named positional args
|
|
35
|
+
if base_url is None and "base_url" in alias_values:
|
|
36
|
+
base_url = alias_values["base_url"]
|
|
37
|
+
|
|
23
38
|
settings = get_settings()
|
|
24
39
|
|
|
25
|
-
self.
|
|
26
|
-
|
|
40
|
+
self.base_url = (
|
|
41
|
+
base_url
|
|
27
42
|
or settings.LOCAL_EMBEDDING_BASE_URL
|
|
28
43
|
and str(settings.LOCAL_EMBEDDING_BASE_URL)
|
|
29
44
|
)
|
|
30
|
-
|
|
31
|
-
|
|
45
|
+
model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
46
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
47
|
+
self.kwargs = normalized_kwargs
|
|
32
48
|
self.generation_kwargs = generation_kwargs or {}
|
|
33
|
-
super().__init__(
|
|
49
|
+
super().__init__(model)
|
|
34
50
|
|
|
35
51
|
@retry_ollama
|
|
36
52
|
def embed_text(self, text: str) -> List[float]:
|
|
37
53
|
embedding_model = self.load_model()
|
|
38
54
|
response = embedding_model.embed(
|
|
39
|
-
model=self.
|
|
55
|
+
model=self.name, input=text, **self.generation_kwargs
|
|
40
56
|
)
|
|
41
57
|
return response["embeddings"][0]
|
|
42
58
|
|
|
@@ -44,7 +60,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
44
60
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
45
61
|
embedding_model = self.load_model()
|
|
46
62
|
response = embedding_model.embed(
|
|
47
|
-
model=self.
|
|
63
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
48
64
|
)
|
|
49
65
|
return response["embeddings"]
|
|
50
66
|
|
|
@@ -52,7 +68,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
52
68
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
53
69
|
embedding_model = self.load_model(async_mode=True)
|
|
54
70
|
response = await embedding_model.embed(
|
|
55
|
-
model=self.
|
|
71
|
+
model=self.name, input=text, **self.generation_kwargs
|
|
56
72
|
)
|
|
57
73
|
return response["embeddings"][0]
|
|
58
74
|
|
|
@@ -60,7 +76,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
60
76
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
61
77
|
embedding_model = self.load_model(async_mode=True)
|
|
62
78
|
response = await embedding_model.embed(
|
|
63
|
-
model=self.
|
|
79
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
64
80
|
)
|
|
65
81
|
return response["embeddings"]
|
|
66
82
|
|
|
@@ -69,12 +85,18 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
69
85
|
###############################################
|
|
70
86
|
|
|
71
87
|
def load_model(self, async_mode: bool = False):
|
|
88
|
+
ollama = require_dependency(
|
|
89
|
+
"ollama",
|
|
90
|
+
provider_label="OllamaEmbeddingModel",
|
|
91
|
+
install_hint="Install it with `pip install ollama`.",
|
|
92
|
+
)
|
|
93
|
+
|
|
72
94
|
if not async_mode:
|
|
73
|
-
return self._build_client(Client)
|
|
74
|
-
return self._build_client(AsyncClient)
|
|
95
|
+
return self._build_client(ollama.Client)
|
|
96
|
+
return self._build_client(ollama.AsyncClient)
|
|
75
97
|
|
|
76
98
|
def _build_client(self, cls):
|
|
77
|
-
return cls(host=self.
|
|
99
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
78
100
|
|
|
79
101
|
def get_model_name(self):
|
|
80
|
-
return f"{self.
|
|
102
|
+
return f"{self.name} (Ollama)"
|
|
@@ -3,7 +3,10 @@ from openai import OpenAI, AsyncOpenAI
|
|
|
3
3
|
from pydantic import SecretStr
|
|
4
4
|
|
|
5
5
|
from deepeval.config.settings import get_settings
|
|
6
|
-
from deepeval.models.utils import
|
|
6
|
+
from deepeval.models.utils import (
|
|
7
|
+
require_secret_api_key,
|
|
8
|
+
normalize_kwargs_and_extract_aliases,
|
|
9
|
+
)
|
|
7
10
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
8
11
|
from deepeval.models.retry_policy import (
|
|
9
12
|
create_retry_decorator,
|
|
@@ -19,37 +22,53 @@ valid_openai_embedding_models = [
|
|
|
19
22
|
"text-embedding-3-large",
|
|
20
23
|
"text-embedding-ada-002",
|
|
21
24
|
]
|
|
25
|
+
|
|
22
26
|
default_openai_embedding_model = "text-embedding-3-small"
|
|
23
27
|
|
|
28
|
+
_ALIAS_MAP = {
|
|
29
|
+
"api_key": ["openai_api_key"],
|
|
30
|
+
}
|
|
31
|
+
|
|
24
32
|
|
|
25
33
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
26
34
|
|
|
27
35
|
def __init__(
|
|
28
36
|
self,
|
|
29
37
|
model: Optional[str] = None,
|
|
30
|
-
|
|
38
|
+
api_key: Optional[str] = None,
|
|
31
39
|
generation_kwargs: Optional[Dict] = None,
|
|
32
|
-
**
|
|
40
|
+
**kwargs,
|
|
33
41
|
):
|
|
34
|
-
|
|
42
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
43
|
+
"OpenAIEmbeddingModel",
|
|
44
|
+
kwargs,
|
|
45
|
+
_ALIAS_MAP,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# re-map depricated keywords to re-named positional args
|
|
49
|
+
if api_key is None and "api_key" in alias_values:
|
|
50
|
+
api_key = alias_values["api_key"]
|
|
51
|
+
|
|
52
|
+
if api_key is not None:
|
|
35
53
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
36
|
-
self.
|
|
54
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
37
55
|
else:
|
|
38
|
-
self.
|
|
56
|
+
self.api_key = get_settings().OPENAI_API_KEY
|
|
39
57
|
|
|
40
|
-
|
|
41
|
-
if
|
|
58
|
+
model = model if model else default_openai_embedding_model
|
|
59
|
+
if model not in valid_openai_embedding_models:
|
|
42
60
|
raise ValueError(
|
|
43
61
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
44
62
|
)
|
|
45
|
-
self.
|
|
63
|
+
self.kwargs = normalized_kwargs
|
|
46
64
|
self.generation_kwargs = generation_kwargs or {}
|
|
65
|
+
super().__init__(model)
|
|
47
66
|
|
|
48
67
|
@retry_openai
|
|
49
68
|
def embed_text(self, text: str) -> List[float]:
|
|
50
69
|
client = self.load_model(async_mode=False)
|
|
51
70
|
response = client.embeddings.create(
|
|
52
|
-
input=text, model=self.
|
|
71
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
53
72
|
)
|
|
54
73
|
return response.data[0].embedding
|
|
55
74
|
|
|
@@ -57,7 +76,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
57
76
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
58
77
|
client = self.load_model(async_mode=False)
|
|
59
78
|
response = client.embeddings.create(
|
|
60
|
-
input=texts, model=self.
|
|
79
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
61
80
|
)
|
|
62
81
|
return [item.embedding for item in response.data]
|
|
63
82
|
|
|
@@ -65,7 +84,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
84
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
66
85
|
client = self.load_model(async_mode=True)
|
|
67
86
|
response = await client.embeddings.create(
|
|
68
|
-
input=text, model=self.
|
|
87
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
69
88
|
)
|
|
70
89
|
return response.data[0].embedding
|
|
71
90
|
|
|
@@ -73,7 +92,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
73
92
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
74
93
|
client = self.load_model(async_mode=True)
|
|
75
94
|
response = await client.embeddings.create(
|
|
76
|
-
input=texts, model=self.
|
|
95
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
77
96
|
)
|
|
78
97
|
return [item.embedding for item in response.data]
|
|
79
98
|
|
|
@@ -81,28 +100,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
81
100
|
# Model
|
|
82
101
|
###############################################
|
|
83
102
|
|
|
84
|
-
def get_model_name(self):
|
|
85
|
-
return self.model_name
|
|
86
|
-
|
|
87
103
|
def load_model(self, async_mode: bool = False):
|
|
88
104
|
if not async_mode:
|
|
89
105
|
return self._build_client(OpenAI)
|
|
90
106
|
return self._build_client(AsyncOpenAI)
|
|
91
107
|
|
|
92
108
|
def _build_client(self, cls):
|
|
93
|
-
|
|
94
|
-
self.
|
|
109
|
+
api_key = require_secret_api_key(
|
|
110
|
+
self.api_key,
|
|
95
111
|
provider_label="OpenAI",
|
|
96
112
|
env_var_name="OPENAI_API_KEY",
|
|
97
|
-
param_hint="`
|
|
113
|
+
param_hint="`api_key` to OpenAIEmbeddingModel(...)",
|
|
98
114
|
)
|
|
99
115
|
|
|
100
|
-
client_kwargs = self.
|
|
116
|
+
client_kwargs = self.kwargs.copy()
|
|
101
117
|
if not sdk_retries_for(PS.OPENAI):
|
|
102
118
|
client_kwargs["max_retries"] = 0
|
|
103
119
|
|
|
104
120
|
client_init_kwargs = dict(
|
|
105
|
-
api_key=
|
|
121
|
+
api_key=api_key,
|
|
106
122
|
**client_kwargs,
|
|
107
123
|
)
|
|
108
124
|
try:
|
|
@@ -113,3 +129,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
113
129
|
client_init_kwargs.pop("max_retries", None)
|
|
114
130
|
return cls(**client_init_kwargs)
|
|
115
131
|
raise
|
|
132
|
+
|
|
133
|
+
def get_model_name(self):
|
|
134
|
+
return f"{self.name} (OpenAI)"
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
|
|
3
1
|
from typing import Optional, Tuple, Union, Dict
|
|
4
2
|
from contextlib import AsyncExitStack
|
|
5
3
|
from pydantic import BaseModel
|
|
@@ -76,6 +74,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
76
74
|
async def a_generate(
|
|
77
75
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
78
76
|
) -> Tuple[Union[str, Dict], float]:
|
|
77
|
+
|
|
79
78
|
try:
|
|
80
79
|
payload = self.get_converse_request_body(prompt)
|
|
81
80
|
client = await self._ensure_client()
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
|
|
3
3
|
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from anthropic import Anthropic, AsyncAnthropic
|
|
5
4
|
from pydantic import BaseModel, SecretStr
|
|
6
5
|
|
|
7
6
|
from deepeval.models import DeepEvalBaseLLM
|
|
@@ -10,10 +9,13 @@ from deepeval.models.retry_policy import (
|
|
|
10
9
|
create_retry_decorator,
|
|
11
10
|
sdk_retries_for,
|
|
12
11
|
)
|
|
13
|
-
from deepeval.models.utils import
|
|
12
|
+
from deepeval.models.utils import (
|
|
13
|
+
require_secret_api_key,
|
|
14
|
+
normalize_kwargs_and_extract_aliases,
|
|
15
|
+
)
|
|
14
16
|
from deepeval.config.settings import get_settings
|
|
15
17
|
from deepeval.constants import ProviderSlug as PS
|
|
16
|
-
|
|
18
|
+
from deepeval.utils import require_dependency
|
|
17
19
|
|
|
18
20
|
# consistent retry rules
|
|
19
21
|
retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
|
|
@@ -30,33 +32,44 @@ model_pricing = {
|
|
|
30
32
|
"claude-instant-1.2": {"input": 0.80 / 1e6, "output": 2.40 / 1e6},
|
|
31
33
|
}
|
|
32
34
|
|
|
35
|
+
_ALIAS_MAP = {
|
|
36
|
+
"api_key": ["_anthropic_api_key"],
|
|
37
|
+
}
|
|
38
|
+
|
|
33
39
|
|
|
34
40
|
class AnthropicModel(DeepEvalBaseLLM):
|
|
35
41
|
def __init__(
|
|
36
42
|
self,
|
|
37
43
|
model: str = "claude-3-7-sonnet-latest",
|
|
44
|
+
api_key: Optional[str] = None,
|
|
38
45
|
temperature: float = 0,
|
|
39
|
-
_anthropic_api_key: Optional[str] = None,
|
|
40
46
|
generation_kwargs: Optional[Dict] = None,
|
|
41
47
|
**kwargs,
|
|
42
48
|
):
|
|
43
|
-
|
|
49
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
50
|
+
"AnthropicModel",
|
|
51
|
+
kwargs,
|
|
52
|
+
_ALIAS_MAP,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# re-map depricated keywords to re-named positional args
|
|
56
|
+
if api_key is None and "api_key" in alias_values:
|
|
57
|
+
api_key = alias_values["api_key"]
|
|
44
58
|
|
|
45
|
-
if
|
|
59
|
+
if api_key is not None:
|
|
46
60
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
47
|
-
self.
|
|
48
|
-
_anthropic_api_key
|
|
49
|
-
)
|
|
61
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
50
62
|
else:
|
|
51
|
-
self.
|
|
63
|
+
self.api_key = get_settings().ANTHROPIC_API_KEY
|
|
52
64
|
|
|
53
65
|
if temperature < 0:
|
|
54
66
|
raise ValueError("Temperature must be >= 0.")
|
|
55
67
|
self.temperature = temperature
|
|
56
68
|
|
|
57
|
-
|
|
69
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
70
|
+
self.kwargs = normalized_kwargs
|
|
58
71
|
self.generation_kwargs = generation_kwargs or {}
|
|
59
|
-
super().__init__(
|
|
72
|
+
super().__init__(model)
|
|
60
73
|
|
|
61
74
|
###############################################
|
|
62
75
|
# Generate functions
|
|
@@ -66,6 +79,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
66
79
|
def generate(
|
|
67
80
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
68
81
|
) -> Tuple[Union[str, Dict], float]:
|
|
82
|
+
|
|
69
83
|
chat_model = self.load_model()
|
|
70
84
|
message = chat_model.messages.create(
|
|
71
85
|
max_tokens=1024,
|
|
@@ -75,7 +89,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
75
89
|
"content": prompt,
|
|
76
90
|
}
|
|
77
91
|
],
|
|
78
|
-
model=self.
|
|
92
|
+
model=self.name,
|
|
79
93
|
temperature=self.temperature,
|
|
80
94
|
**self.generation_kwargs,
|
|
81
95
|
)
|
|
@@ -92,6 +106,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
92
106
|
async def a_generate(
|
|
93
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
94
108
|
) -> Tuple[str, float]:
|
|
109
|
+
|
|
95
110
|
chat_model = self.load_model(async_mode=True)
|
|
96
111
|
message = await chat_model.messages.create(
|
|
97
112
|
max_tokens=1024,
|
|
@@ -101,7 +116,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
101
116
|
"content": prompt,
|
|
102
117
|
}
|
|
103
118
|
],
|
|
104
|
-
model=self.
|
|
119
|
+
model=self.name,
|
|
105
120
|
temperature=self.temperature,
|
|
106
121
|
**self.generation_kwargs,
|
|
107
122
|
)
|
|
@@ -120,7 +135,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
120
135
|
###############################################
|
|
121
136
|
|
|
122
137
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
123
|
-
pricing = model_pricing.get(self.
|
|
138
|
+
pricing = model_pricing.get(self.name)
|
|
124
139
|
|
|
125
140
|
if pricing is None:
|
|
126
141
|
# Calculate average cost from all known models
|
|
@@ -133,7 +148,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
133
148
|
pricing = {"input": avg_input_cost, "output": avg_output_cost}
|
|
134
149
|
|
|
135
150
|
warnings.warn(
|
|
136
|
-
f"[Warning] Pricing not defined for model '{self.
|
|
151
|
+
f"[Warning] Pricing not defined for model '{self.name}'. "
|
|
137
152
|
"Using average input/output token costs from existing model_pricing."
|
|
138
153
|
)
|
|
139
154
|
|
|
@@ -146,12 +161,15 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
146
161
|
###############################################
|
|
147
162
|
|
|
148
163
|
def load_model(self, async_mode: bool = False):
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
164
|
+
module = require_dependency(
|
|
165
|
+
"anthropic",
|
|
166
|
+
provider_label="AnthropicModel",
|
|
167
|
+
install_hint="Install it with `pip install anthropic`.",
|
|
168
|
+
)
|
|
152
169
|
|
|
153
|
-
|
|
154
|
-
|
|
170
|
+
if not async_mode:
|
|
171
|
+
return self._build_client(module.Anthropic)
|
|
172
|
+
return self._build_client(module.AsyncAnthropic)
|
|
155
173
|
|
|
156
174
|
def _client_kwargs(self) -> Dict:
|
|
157
175
|
kwargs = dict(self.kwargs or {})
|
|
@@ -163,10 +181,10 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
163
181
|
|
|
164
182
|
def _build_client(self, cls):
|
|
165
183
|
api_key = require_secret_api_key(
|
|
166
|
-
self.
|
|
184
|
+
self.api_key,
|
|
167
185
|
provider_label="Anthropic",
|
|
168
186
|
env_var_name="ANTHROPIC_API_KEY",
|
|
169
|
-
param_hint="`
|
|
187
|
+
param_hint="`api_key` to AnthropicModel(...)",
|
|
170
188
|
)
|
|
171
189
|
kw = dict(
|
|
172
190
|
api_key=api_key,
|
|
@@ -180,3 +198,6 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
180
198
|
kw.pop("max_retries", None)
|
|
181
199
|
return cls(**kw)
|
|
182
200
|
raise
|
|
201
|
+
|
|
202
|
+
def get_model_name(self):
|
|
203
|
+
return f"{self.name} (Anthropic)"
|