deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from ollama import Client, AsyncClient
|
|
2
1
|
from typing import List, Optional, Dict
|
|
3
2
|
|
|
4
|
-
from deepeval.
|
|
3
|
+
from deepeval.config.settings import get_settings
|
|
4
|
+
from deepeval.utils import require_dependency
|
|
5
5
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
|
+
from deepeval.models.utils import (
|
|
7
|
+
normalize_kwargs_and_extract_aliases,
|
|
8
|
+
)
|
|
6
9
|
from deepeval.models.retry_policy import (
|
|
7
10
|
create_retry_decorator,
|
|
8
11
|
)
|
|
@@ -11,30 +14,45 @@ from deepeval.constants import ProviderSlug as PS
|
|
|
11
14
|
|
|
12
15
|
retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
13
16
|
|
|
17
|
+
_ALIAS_MAP = {"base_url": ["host"]}
|
|
18
|
+
|
|
14
19
|
|
|
15
20
|
class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
16
21
|
def __init__(
|
|
17
22
|
self,
|
|
18
23
|
model: Optional[str] = None,
|
|
19
|
-
|
|
24
|
+
base_url: Optional[str] = None,
|
|
20
25
|
generation_kwargs: Optional[Dict] = None,
|
|
21
|
-
**
|
|
26
|
+
**kwargs,
|
|
22
27
|
):
|
|
23
|
-
|
|
24
|
-
|
|
28
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
29
|
+
"OllamaEmbeddingModel",
|
|
30
|
+
kwargs,
|
|
31
|
+
_ALIAS_MAP,
|
|
25
32
|
)
|
|
26
|
-
|
|
27
|
-
|
|
33
|
+
|
|
34
|
+
# re-map depricated keywords to re-named positional args
|
|
35
|
+
if base_url is None and "base_url" in alias_values:
|
|
36
|
+
base_url = alias_values["base_url"]
|
|
37
|
+
|
|
38
|
+
settings = get_settings()
|
|
39
|
+
|
|
40
|
+
self.base_url = (
|
|
41
|
+
base_url
|
|
42
|
+
or settings.LOCAL_EMBEDDING_BASE_URL
|
|
43
|
+
and str(settings.LOCAL_EMBEDDING_BASE_URL)
|
|
28
44
|
)
|
|
29
|
-
|
|
45
|
+
model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
46
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
47
|
+
self.kwargs = normalized_kwargs
|
|
30
48
|
self.generation_kwargs = generation_kwargs or {}
|
|
31
|
-
super().__init__(
|
|
49
|
+
super().__init__(model)
|
|
32
50
|
|
|
33
51
|
@retry_ollama
|
|
34
52
|
def embed_text(self, text: str) -> List[float]:
|
|
35
53
|
embedding_model = self.load_model()
|
|
36
54
|
response = embedding_model.embed(
|
|
37
|
-
model=self.
|
|
55
|
+
model=self.name, input=text, **self.generation_kwargs
|
|
38
56
|
)
|
|
39
57
|
return response["embeddings"][0]
|
|
40
58
|
|
|
@@ -42,7 +60,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
42
60
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
43
61
|
embedding_model = self.load_model()
|
|
44
62
|
response = embedding_model.embed(
|
|
45
|
-
model=self.
|
|
63
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
46
64
|
)
|
|
47
65
|
return response["embeddings"]
|
|
48
66
|
|
|
@@ -50,7 +68,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
50
68
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
51
69
|
embedding_model = self.load_model(async_mode=True)
|
|
52
70
|
response = await embedding_model.embed(
|
|
53
|
-
model=self.
|
|
71
|
+
model=self.name, input=text, **self.generation_kwargs
|
|
54
72
|
)
|
|
55
73
|
return response["embeddings"][0]
|
|
56
74
|
|
|
@@ -58,7 +76,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
58
76
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
59
77
|
embedding_model = self.load_model(async_mode=True)
|
|
60
78
|
response = await embedding_model.embed(
|
|
61
|
-
model=self.
|
|
79
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
62
80
|
)
|
|
63
81
|
return response["embeddings"]
|
|
64
82
|
|
|
@@ -67,12 +85,18 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
67
85
|
###############################################
|
|
68
86
|
|
|
69
87
|
def load_model(self, async_mode: bool = False):
|
|
88
|
+
ollama = require_dependency(
|
|
89
|
+
"ollama",
|
|
90
|
+
provider_label="OllamaEmbeddingModel",
|
|
91
|
+
install_hint="Install it with `pip install ollama`.",
|
|
92
|
+
)
|
|
93
|
+
|
|
70
94
|
if not async_mode:
|
|
71
|
-
return self._build_client(Client)
|
|
72
|
-
return self._build_client(AsyncClient)
|
|
95
|
+
return self._build_client(ollama.Client)
|
|
96
|
+
return self._build_client(ollama.AsyncClient)
|
|
73
97
|
|
|
74
98
|
def _build_client(self, cls):
|
|
75
|
-
return cls(host=self.
|
|
99
|
+
return cls(host=self.base_url, **self.kwargs)
|
|
76
100
|
|
|
77
101
|
def get_model_name(self):
|
|
78
|
-
return f"{self.
|
|
102
|
+
return f"{self.name} (Ollama)"
|
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
from typing import Dict, Optional, List
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
|
+
from deepeval.models.utils import (
|
|
7
|
+
require_secret_api_key,
|
|
8
|
+
normalize_kwargs_and_extract_aliases,
|
|
9
|
+
)
|
|
3
10
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
4
11
|
from deepeval.models.retry_policy import (
|
|
5
12
|
create_retry_decorator,
|
|
@@ -15,32 +22,53 @@ valid_openai_embedding_models = [
|
|
|
15
22
|
"text-embedding-3-large",
|
|
16
23
|
"text-embedding-ada-002",
|
|
17
24
|
]
|
|
25
|
+
|
|
18
26
|
default_openai_embedding_model = "text-embedding-3-small"
|
|
19
27
|
|
|
28
|
+
_ALIAS_MAP = {
|
|
29
|
+
"api_key": ["openai_api_key"],
|
|
30
|
+
}
|
|
31
|
+
|
|
20
32
|
|
|
21
33
|
class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
22
34
|
|
|
23
35
|
def __init__(
|
|
24
36
|
self,
|
|
25
37
|
model: Optional[str] = None,
|
|
26
|
-
|
|
38
|
+
api_key: Optional[str] = None,
|
|
27
39
|
generation_kwargs: Optional[Dict] = None,
|
|
28
|
-
**
|
|
40
|
+
**kwargs,
|
|
29
41
|
):
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
42
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
43
|
+
"OpenAIEmbeddingModel",
|
|
44
|
+
kwargs,
|
|
45
|
+
_ALIAS_MAP,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# re-map depricated keywords to re-named positional args
|
|
49
|
+
if api_key is None and "api_key" in alias_values:
|
|
50
|
+
api_key = alias_values["api_key"]
|
|
51
|
+
|
|
52
|
+
if api_key is not None:
|
|
53
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
54
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
55
|
+
else:
|
|
56
|
+
self.api_key = get_settings().OPENAI_API_KEY
|
|
57
|
+
|
|
58
|
+
model = model if model else default_openai_embedding_model
|
|
59
|
+
if model not in valid_openai_embedding_models:
|
|
33
60
|
raise ValueError(
|
|
34
61
|
f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
|
|
35
62
|
)
|
|
36
|
-
self.
|
|
63
|
+
self.kwargs = normalized_kwargs
|
|
37
64
|
self.generation_kwargs = generation_kwargs or {}
|
|
65
|
+
super().__init__(model)
|
|
38
66
|
|
|
39
67
|
@retry_openai
|
|
40
68
|
def embed_text(self, text: str) -> List[float]:
|
|
41
69
|
client = self.load_model(async_mode=False)
|
|
42
70
|
response = client.embeddings.create(
|
|
43
|
-
input=text, model=self.
|
|
71
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
44
72
|
)
|
|
45
73
|
return response.data[0].embedding
|
|
46
74
|
|
|
@@ -48,7 +76,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
48
76
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
49
77
|
client = self.load_model(async_mode=False)
|
|
50
78
|
response = client.embeddings.create(
|
|
51
|
-
input=texts, model=self.
|
|
79
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
52
80
|
)
|
|
53
81
|
return [item.embedding for item in response.data]
|
|
54
82
|
|
|
@@ -56,7 +84,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
84
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
57
85
|
client = self.load_model(async_mode=True)
|
|
58
86
|
response = await client.embeddings.create(
|
|
59
|
-
input=text, model=self.
|
|
87
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
60
88
|
)
|
|
61
89
|
return response.data[0].embedding
|
|
62
90
|
|
|
@@ -64,7 +92,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
64
92
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
65
93
|
client = self.load_model(async_mode=True)
|
|
66
94
|
response = await client.embeddings.create(
|
|
67
|
-
input=texts, model=self.
|
|
95
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
68
96
|
)
|
|
69
97
|
return [item.embedding for item in response.data]
|
|
70
98
|
|
|
@@ -72,21 +100,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
72
100
|
# Model
|
|
73
101
|
###############################################
|
|
74
102
|
|
|
75
|
-
def get_model_name(self):
|
|
76
|
-
return self.model_name
|
|
77
|
-
|
|
78
103
|
def load_model(self, async_mode: bool = False):
|
|
79
104
|
if not async_mode:
|
|
80
105
|
return self._build_client(OpenAI)
|
|
81
106
|
return self._build_client(AsyncOpenAI)
|
|
82
107
|
|
|
83
108
|
def _build_client(self, cls):
|
|
84
|
-
|
|
109
|
+
api_key = require_secret_api_key(
|
|
110
|
+
self.api_key,
|
|
111
|
+
provider_label="OpenAI",
|
|
112
|
+
env_var_name="OPENAI_API_KEY",
|
|
113
|
+
param_hint="`api_key` to OpenAIEmbeddingModel(...)",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
client_kwargs = self.kwargs.copy()
|
|
85
117
|
if not sdk_retries_for(PS.OPENAI):
|
|
86
118
|
client_kwargs["max_retries"] = 0
|
|
87
119
|
|
|
88
120
|
client_init_kwargs = dict(
|
|
89
|
-
api_key=
|
|
121
|
+
api_key=api_key,
|
|
90
122
|
**client_kwargs,
|
|
91
123
|
)
|
|
92
124
|
try:
|
|
@@ -97,3 +129,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
97
129
|
client_init_kwargs.pop("max_retries", None)
|
|
98
130
|
return cls(**client_init_kwargs)
|
|
99
131
|
raise
|
|
132
|
+
|
|
133
|
+
def get_model_name(self):
|
|
134
|
+
return f"{self.name} (OpenAI)"
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
|
|
3
1
|
from typing import Optional, Tuple, Union, Dict
|
|
4
2
|
from contextlib import AsyncExitStack
|
|
5
3
|
from pydantic import BaseModel
|
|
@@ -76,6 +74,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
76
74
|
async def a_generate(
|
|
77
75
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
78
76
|
) -> Tuple[Union[str, Dict], float]:
|
|
77
|
+
|
|
79
78
|
try:
|
|
80
79
|
payload = self.get_converse_request_body(prompt)
|
|
81
80
|
client = await self._ensure_client()
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
|
|
3
3
|
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from
|
|
5
|
-
from pydantic import BaseModel
|
|
4
|
+
from pydantic import BaseModel, SecretStr
|
|
6
5
|
|
|
7
6
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
7
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
@@ -10,10 +9,13 @@ from deepeval.models.retry_policy import (
|
|
|
10
9
|
create_retry_decorator,
|
|
11
10
|
sdk_retries_for,
|
|
12
11
|
)
|
|
13
|
-
from deepeval.models.utils import
|
|
12
|
+
from deepeval.models.utils import (
|
|
13
|
+
require_secret_api_key,
|
|
14
|
+
normalize_kwargs_and_extract_aliases,
|
|
15
|
+
)
|
|
14
16
|
from deepeval.config.settings import get_settings
|
|
15
17
|
from deepeval.constants import ProviderSlug as PS
|
|
16
|
-
|
|
18
|
+
from deepeval.utils import require_dependency
|
|
17
19
|
|
|
18
20
|
# consistent retry rules
|
|
19
21
|
retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
|
|
@@ -30,26 +32,44 @@ model_pricing = {
|
|
|
30
32
|
"claude-instant-1.2": {"input": 0.80 / 1e6, "output": 2.40 / 1e6},
|
|
31
33
|
}
|
|
32
34
|
|
|
35
|
+
_ALIAS_MAP = {
|
|
36
|
+
"api_key": ["_anthropic_api_key"],
|
|
37
|
+
}
|
|
38
|
+
|
|
33
39
|
|
|
34
40
|
class AnthropicModel(DeepEvalBaseLLM):
|
|
35
41
|
def __init__(
|
|
36
42
|
self,
|
|
37
43
|
model: str = "claude-3-7-sonnet-latest",
|
|
44
|
+
api_key: Optional[str] = None,
|
|
38
45
|
temperature: float = 0,
|
|
39
|
-
_anthropic_api_key: Optional[str] = None,
|
|
40
46
|
generation_kwargs: Optional[Dict] = None,
|
|
41
47
|
**kwargs,
|
|
42
48
|
):
|
|
43
|
-
|
|
44
|
-
|
|
49
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
50
|
+
"AnthropicModel",
|
|
51
|
+
kwargs,
|
|
52
|
+
_ALIAS_MAP,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# re-map depricated keywords to re-named positional args
|
|
56
|
+
if api_key is None and "api_key" in alias_values:
|
|
57
|
+
api_key = alias_values["api_key"]
|
|
58
|
+
|
|
59
|
+
if api_key is not None:
|
|
60
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
61
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
62
|
+
else:
|
|
63
|
+
self.api_key = get_settings().ANTHROPIC_API_KEY
|
|
45
64
|
|
|
46
65
|
if temperature < 0:
|
|
47
66
|
raise ValueError("Temperature must be >= 0.")
|
|
48
67
|
self.temperature = temperature
|
|
49
68
|
|
|
50
|
-
|
|
69
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
70
|
+
self.kwargs = normalized_kwargs
|
|
51
71
|
self.generation_kwargs = generation_kwargs or {}
|
|
52
|
-
super().__init__(
|
|
72
|
+
super().__init__(model)
|
|
53
73
|
|
|
54
74
|
###############################################
|
|
55
75
|
# Generate functions
|
|
@@ -59,6 +79,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
59
79
|
def generate(
|
|
60
80
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
61
81
|
) -> Tuple[Union[str, Dict], float]:
|
|
82
|
+
|
|
62
83
|
chat_model = self.load_model()
|
|
63
84
|
message = chat_model.messages.create(
|
|
64
85
|
max_tokens=1024,
|
|
@@ -68,7 +89,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
68
89
|
"content": prompt,
|
|
69
90
|
}
|
|
70
91
|
],
|
|
71
|
-
model=self.
|
|
92
|
+
model=self.name,
|
|
72
93
|
temperature=self.temperature,
|
|
73
94
|
**self.generation_kwargs,
|
|
74
95
|
)
|
|
@@ -85,6 +106,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
85
106
|
async def a_generate(
|
|
86
107
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
87
108
|
) -> Tuple[str, float]:
|
|
109
|
+
|
|
88
110
|
chat_model = self.load_model(async_mode=True)
|
|
89
111
|
message = await chat_model.messages.create(
|
|
90
112
|
max_tokens=1024,
|
|
@@ -94,7 +116,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
94
116
|
"content": prompt,
|
|
95
117
|
}
|
|
96
118
|
],
|
|
97
|
-
model=self.
|
|
119
|
+
model=self.name,
|
|
98
120
|
temperature=self.temperature,
|
|
99
121
|
**self.generation_kwargs,
|
|
100
122
|
)
|
|
@@ -113,7 +135,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
113
135
|
###############################################
|
|
114
136
|
|
|
115
137
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
116
|
-
pricing = model_pricing.get(self.
|
|
138
|
+
pricing = model_pricing.get(self.name)
|
|
117
139
|
|
|
118
140
|
if pricing is None:
|
|
119
141
|
# Calculate average cost from all known models
|
|
@@ -126,7 +148,7 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
126
148
|
pricing = {"input": avg_input_cost, "output": avg_output_cost}
|
|
127
149
|
|
|
128
150
|
warnings.warn(
|
|
129
|
-
f"[Warning] Pricing not defined for model '{self.
|
|
151
|
+
f"[Warning] Pricing not defined for model '{self.name}'. "
|
|
130
152
|
"Using average input/output token costs from existing model_pricing."
|
|
131
153
|
)
|
|
132
154
|
|
|
@@ -139,12 +161,15 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
139
161
|
###############################################
|
|
140
162
|
|
|
141
163
|
def load_model(self, async_mode: bool = False):
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
164
|
+
module = require_dependency(
|
|
165
|
+
"anthropic",
|
|
166
|
+
provider_label="AnthropicModel",
|
|
167
|
+
install_hint="Install it with `pip install anthropic`.",
|
|
168
|
+
)
|
|
145
169
|
|
|
146
|
-
|
|
147
|
-
|
|
170
|
+
if not async_mode:
|
|
171
|
+
return self._build_client(module.Anthropic)
|
|
172
|
+
return self._build_client(module.AsyncAnthropic)
|
|
148
173
|
|
|
149
174
|
def _client_kwargs(self) -> Dict:
|
|
150
175
|
kwargs = dict(self.kwargs or {})
|
|
@@ -155,9 +180,14 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
155
180
|
return kwargs
|
|
156
181
|
|
|
157
182
|
def _build_client(self, cls):
|
|
158
|
-
|
|
183
|
+
api_key = require_secret_api_key(
|
|
184
|
+
self.api_key,
|
|
185
|
+
provider_label="Anthropic",
|
|
186
|
+
env_var_name="ANTHROPIC_API_KEY",
|
|
187
|
+
param_hint="`api_key` to AnthropicModel(...)",
|
|
188
|
+
)
|
|
159
189
|
kw = dict(
|
|
160
|
-
api_key=
|
|
190
|
+
api_key=api_key,
|
|
161
191
|
**self._client_kwargs(),
|
|
162
192
|
)
|
|
163
193
|
try:
|
|
@@ -168,3 +198,6 @@ class AnthropicModel(DeepEvalBaseLLM):
|
|
|
168
198
|
kw.pop("max_retries", None)
|
|
169
199
|
return cls(**kw)
|
|
170
200
|
raise
|
|
201
|
+
|
|
202
|
+
def get_model_name(self):
|
|
203
|
+
return f"{self.name} (Anthropic)"
|