deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,11 @@
1
- from ollama import Client, AsyncClient
2
1
  from typing import List, Optional, Dict
3
2
 
4
3
  from deepeval.config.settings import get_settings
4
+ from deepeval.utils import require_dependency
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
+ from deepeval.models.utils import (
7
+ normalize_kwargs_and_extract_aliases,
8
+ )
6
9
  from deepeval.models.retry_policy import (
7
10
  create_retry_decorator,
8
11
  )
@@ -11,32 +14,45 @@ from deepeval.constants import ProviderSlug as PS
11
14
 
12
15
  retry_ollama = create_retry_decorator(PS.OLLAMA)
13
16
 
17
+ _ALIAS_MAP = {"base_url": ["host"]}
18
+
14
19
 
15
20
  class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
16
21
  def __init__(
17
22
  self,
18
23
  model: Optional[str] = None,
19
- host: Optional[str] = None,
24
+ base_url: Optional[str] = None,
20
25
  generation_kwargs: Optional[Dict] = None,
21
- **client_kwargs,
26
+ **kwargs,
22
27
  ):
28
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
29
+ "OllamaEmbeddingModel",
30
+ kwargs,
31
+ _ALIAS_MAP,
32
+ )
33
+
34
+ # re-map depricated keywords to re-named positional args
35
+ if base_url is None and "base_url" in alias_values:
36
+ base_url = alias_values["base_url"]
37
+
23
38
  settings = get_settings()
24
39
 
25
- self.host = (
26
- host
40
+ self.base_url = (
41
+ base_url
27
42
  or settings.LOCAL_EMBEDDING_BASE_URL
28
43
  and str(settings.LOCAL_EMBEDDING_BASE_URL)
29
44
  )
30
- self.model_name = model or settings.LOCAL_EMBEDDING_MODEL_NAME
31
- self.client_kwargs = client_kwargs or {}
45
+ model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
46
+ # Keep sanitized kwargs for client call to strip legacy keys
47
+ self.kwargs = normalized_kwargs
32
48
  self.generation_kwargs = generation_kwargs or {}
33
- super().__init__(self.model_name)
49
+ super().__init__(model)
34
50
 
35
51
  @retry_ollama
36
52
  def embed_text(self, text: str) -> List[float]:
37
53
  embedding_model = self.load_model()
38
54
  response = embedding_model.embed(
39
- model=self.model_name, input=text, **self.generation_kwargs
55
+ model=self.name, input=text, **self.generation_kwargs
40
56
  )
41
57
  return response["embeddings"][0]
42
58
 
@@ -44,7 +60,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
44
60
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
45
61
  embedding_model = self.load_model()
46
62
  response = embedding_model.embed(
47
- model=self.model_name, input=texts, **self.generation_kwargs
63
+ model=self.name, input=texts, **self.generation_kwargs
48
64
  )
49
65
  return response["embeddings"]
50
66
 
@@ -52,7 +68,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
52
68
  async def a_embed_text(self, text: str) -> List[float]:
53
69
  embedding_model = self.load_model(async_mode=True)
54
70
  response = await embedding_model.embed(
55
- model=self.model_name, input=text, **self.generation_kwargs
71
+ model=self.name, input=text, **self.generation_kwargs
56
72
  )
57
73
  return response["embeddings"][0]
58
74
 
@@ -60,7 +76,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
60
76
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
61
77
  embedding_model = self.load_model(async_mode=True)
62
78
  response = await embedding_model.embed(
63
- model=self.model_name, input=texts, **self.generation_kwargs
79
+ model=self.name, input=texts, **self.generation_kwargs
64
80
  )
65
81
  return response["embeddings"]
66
82
 
@@ -69,12 +85,18 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
69
85
  ###############################################
70
86
 
71
87
  def load_model(self, async_mode: bool = False):
88
+ ollama = require_dependency(
89
+ "ollama",
90
+ provider_label="OllamaEmbeddingModel",
91
+ install_hint="Install it with `pip install ollama`.",
92
+ )
93
+
72
94
  if not async_mode:
73
- return self._build_client(Client)
74
- return self._build_client(AsyncClient)
95
+ return self._build_client(ollama.Client)
96
+ return self._build_client(ollama.AsyncClient)
75
97
 
76
98
  def _build_client(self, cls):
77
- return cls(host=self.host, **self.client_kwargs)
99
+ return cls(host=self.base_url, **self.kwargs)
78
100
 
79
101
  def get_model_name(self):
80
- return f"{self.model_name} (Ollama)"
102
+ return f"{self.name} (Ollama)"
@@ -3,7 +3,10 @@ from openai import OpenAI, AsyncOpenAI
3
3
  from pydantic import SecretStr
4
4
 
5
5
  from deepeval.config.settings import get_settings
6
- from deepeval.models.utils import require_secret_api_key
6
+ from deepeval.models.utils import (
7
+ require_secret_api_key,
8
+ normalize_kwargs_and_extract_aliases,
9
+ )
7
10
  from deepeval.models import DeepEvalBaseEmbeddingModel
8
11
  from deepeval.models.retry_policy import (
9
12
  create_retry_decorator,
@@ -19,37 +22,53 @@ valid_openai_embedding_models = [
19
22
  "text-embedding-3-large",
20
23
  "text-embedding-ada-002",
21
24
  ]
25
+
22
26
  default_openai_embedding_model = "text-embedding-3-small"
23
27
 
28
+ _ALIAS_MAP = {
29
+ "api_key": ["openai_api_key"],
30
+ }
31
+
24
32
 
25
33
  class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
26
34
 
27
35
  def __init__(
28
36
  self,
29
37
  model: Optional[str] = None,
30
- openai_api_key: Optional[str] = None,
38
+ api_key: Optional[str] = None,
31
39
  generation_kwargs: Optional[Dict] = None,
32
- **client_kwargs,
40
+ **kwargs,
33
41
  ):
34
- if openai_api_key is not None:
42
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
43
+ "OpenAIEmbeddingModel",
44
+ kwargs,
45
+ _ALIAS_MAP,
46
+ )
47
+
48
+ # re-map depricated keywords to re-named positional args
49
+ if api_key is None and "api_key" in alias_values:
50
+ api_key = alias_values["api_key"]
51
+
52
+ if api_key is not None:
35
53
  # keep it secret, keep it safe from serializings, logging and alike
36
- self.openai_api_key: SecretStr | None = SecretStr(openai_api_key)
54
+ self.api_key: SecretStr | None = SecretStr(api_key)
37
55
  else:
38
- self.openai_api_key = get_settings().OPENAI_API_KEY
56
+ self.api_key = get_settings().OPENAI_API_KEY
39
57
 
40
- self.model_name = model if model else default_openai_embedding_model
41
- if self.model_name not in valid_openai_embedding_models:
58
+ model = model if model else default_openai_embedding_model
59
+ if model not in valid_openai_embedding_models:
42
60
  raise ValueError(
43
61
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
44
62
  )
45
- self.client_kwargs = client_kwargs or {}
63
+ self.kwargs = normalized_kwargs
46
64
  self.generation_kwargs = generation_kwargs or {}
65
+ super().__init__(model)
47
66
 
48
67
  @retry_openai
49
68
  def embed_text(self, text: str) -> List[float]:
50
69
  client = self.load_model(async_mode=False)
51
70
  response = client.embeddings.create(
52
- input=text, model=self.model_name, **self.generation_kwargs
71
+ input=text, model=self.name, **self.generation_kwargs
53
72
  )
54
73
  return response.data[0].embedding
55
74
 
@@ -57,7 +76,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
57
76
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
58
77
  client = self.load_model(async_mode=False)
59
78
  response = client.embeddings.create(
60
- input=texts, model=self.model_name, **self.generation_kwargs
79
+ input=texts, model=self.name, **self.generation_kwargs
61
80
  )
62
81
  return [item.embedding for item in response.data]
63
82
 
@@ -65,7 +84,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
65
84
  async def a_embed_text(self, text: str) -> List[float]:
66
85
  client = self.load_model(async_mode=True)
67
86
  response = await client.embeddings.create(
68
- input=text, model=self.model_name, **self.generation_kwargs
87
+ input=text, model=self.name, **self.generation_kwargs
69
88
  )
70
89
  return response.data[0].embedding
71
90
 
@@ -73,7 +92,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
73
92
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
74
93
  client = self.load_model(async_mode=True)
75
94
  response = await client.embeddings.create(
76
- input=texts, model=self.model_name, **self.generation_kwargs
95
+ input=texts, model=self.name, **self.generation_kwargs
77
96
  )
78
97
  return [item.embedding for item in response.data]
79
98
 
@@ -81,28 +100,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
81
100
  # Model
82
101
  ###############################################
83
102
 
84
- def get_model_name(self):
85
- return self.model_name
86
-
87
103
  def load_model(self, async_mode: bool = False):
88
104
  if not async_mode:
89
105
  return self._build_client(OpenAI)
90
106
  return self._build_client(AsyncOpenAI)
91
107
 
92
108
  def _build_client(self, cls):
93
- openai_api_key = require_secret_api_key(
94
- self.openai_api_key,
109
+ api_key = require_secret_api_key(
110
+ self.api_key,
95
111
  provider_label="OpenAI",
96
112
  env_var_name="OPENAI_API_KEY",
97
- param_hint="`openai_api_key` to OpenAIEmbeddingModel(...)",
113
+ param_hint="`api_key` to OpenAIEmbeddingModel(...)",
98
114
  )
99
115
 
100
- client_kwargs = self.client_kwargs.copy()
116
+ client_kwargs = self.kwargs.copy()
101
117
  if not sdk_retries_for(PS.OPENAI):
102
118
  client_kwargs["max_retries"] = 0
103
119
 
104
120
  client_init_kwargs = dict(
105
- api_key=openai_api_key,
121
+ api_key=api_key,
106
122
  **client_kwargs,
107
123
  )
108
124
  try:
@@ -113,3 +129,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
113
129
  client_init_kwargs.pop("max_retries", None)
114
130
  return cls(**client_init_kwargs)
115
131
  raise
132
+
133
+ def get_model_name(self):
134
+ return f"{self.name} (OpenAI)"
@@ -1,5 +1,3 @@
1
- import asyncio
2
-
3
1
  from typing import Optional, Tuple, Union, Dict
4
2
  from contextlib import AsyncExitStack
5
3
  from pydantic import BaseModel
@@ -76,6 +74,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
76
74
  async def a_generate(
77
75
  self, prompt: str, schema: Optional[BaseModel] = None
78
76
  ) -> Tuple[Union[str, Dict], float]:
77
+
79
78
  try:
80
79
  payload = self.get_converse_request_body(prompt)
81
80
  client = await self._ensure_client()
@@ -1,7 +1,6 @@
1
1
  import warnings
2
2
 
3
3
  from typing import Optional, Tuple, Union, Dict
4
- from anthropic import Anthropic, AsyncAnthropic
5
4
  from pydantic import BaseModel, SecretStr
6
5
 
7
6
  from deepeval.models import DeepEvalBaseLLM
@@ -10,10 +9,13 @@ from deepeval.models.retry_policy import (
10
9
  create_retry_decorator,
11
10
  sdk_retries_for,
12
11
  )
13
- from deepeval.models.utils import parse_model_name, require_secret_api_key
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ normalize_kwargs_and_extract_aliases,
15
+ )
14
16
  from deepeval.config.settings import get_settings
15
17
  from deepeval.constants import ProviderSlug as PS
16
-
18
+ from deepeval.utils import require_dependency
17
19
 
18
20
  # consistent retry rules
19
21
  retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
@@ -30,33 +32,44 @@ model_pricing = {
30
32
  "claude-instant-1.2": {"input": 0.80 / 1e6, "output": 2.40 / 1e6},
31
33
  }
32
34
 
35
+ _ALIAS_MAP = {
36
+ "api_key": ["_anthropic_api_key"],
37
+ }
38
+
33
39
 
34
40
  class AnthropicModel(DeepEvalBaseLLM):
35
41
  def __init__(
36
42
  self,
37
43
  model: str = "claude-3-7-sonnet-latest",
44
+ api_key: Optional[str] = None,
38
45
  temperature: float = 0,
39
- _anthropic_api_key: Optional[str] = None,
40
46
  generation_kwargs: Optional[Dict] = None,
41
47
  **kwargs,
42
48
  ):
43
- model_name = parse_model_name(model)
49
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
50
+ "AnthropicModel",
51
+ kwargs,
52
+ _ALIAS_MAP,
53
+ )
54
+
55
+ # re-map depricated keywords to re-named positional args
56
+ if api_key is None and "api_key" in alias_values:
57
+ api_key = alias_values["api_key"]
44
58
 
45
- if _anthropic_api_key is not None:
59
+ if api_key is not None:
46
60
  # keep it secret, keep it safe from serializings, logging and alike
47
- self._anthropic_api_key: SecretStr | None = SecretStr(
48
- _anthropic_api_key
49
- )
61
+ self.api_key: SecretStr | None = SecretStr(api_key)
50
62
  else:
51
- self._anthropic_api_key = get_settings().ANTHROPIC_API_KEY
63
+ self.api_key = get_settings().ANTHROPIC_API_KEY
52
64
 
53
65
  if temperature < 0:
54
66
  raise ValueError("Temperature must be >= 0.")
55
67
  self.temperature = temperature
56
68
 
57
- self.kwargs = kwargs
69
+ # Keep sanitized kwargs for client call to strip legacy keys
70
+ self.kwargs = normalized_kwargs
58
71
  self.generation_kwargs = generation_kwargs or {}
59
- super().__init__(model_name)
72
+ super().__init__(model)
60
73
 
61
74
  ###############################################
62
75
  # Generate functions
@@ -66,6 +79,7 @@ class AnthropicModel(DeepEvalBaseLLM):
66
79
  def generate(
67
80
  self, prompt: str, schema: Optional[BaseModel] = None
68
81
  ) -> Tuple[Union[str, Dict], float]:
82
+
69
83
  chat_model = self.load_model()
70
84
  message = chat_model.messages.create(
71
85
  max_tokens=1024,
@@ -75,7 +89,7 @@ class AnthropicModel(DeepEvalBaseLLM):
75
89
  "content": prompt,
76
90
  }
77
91
  ],
78
- model=self.model_name,
92
+ model=self.name,
79
93
  temperature=self.temperature,
80
94
  **self.generation_kwargs,
81
95
  )
@@ -92,6 +106,7 @@ class AnthropicModel(DeepEvalBaseLLM):
92
106
  async def a_generate(
93
107
  self, prompt: str, schema: Optional[BaseModel] = None
94
108
  ) -> Tuple[str, float]:
109
+
95
110
  chat_model = self.load_model(async_mode=True)
96
111
  message = await chat_model.messages.create(
97
112
  max_tokens=1024,
@@ -101,7 +116,7 @@ class AnthropicModel(DeepEvalBaseLLM):
101
116
  "content": prompt,
102
117
  }
103
118
  ],
104
- model=self.model_name,
119
+ model=self.name,
105
120
  temperature=self.temperature,
106
121
  **self.generation_kwargs,
107
122
  )
@@ -120,7 +135,7 @@ class AnthropicModel(DeepEvalBaseLLM):
120
135
  ###############################################
121
136
 
122
137
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
123
- pricing = model_pricing.get(self.model_name)
138
+ pricing = model_pricing.get(self.name)
124
139
 
125
140
  if pricing is None:
126
141
  # Calculate average cost from all known models
@@ -133,7 +148,7 @@ class AnthropicModel(DeepEvalBaseLLM):
133
148
  pricing = {"input": avg_input_cost, "output": avg_output_cost}
134
149
 
135
150
  warnings.warn(
136
- f"[Warning] Pricing not defined for model '{self.model_name}'. "
151
+ f"[Warning] Pricing not defined for model '{self.name}'. "
137
152
  "Using average input/output token costs from existing model_pricing."
138
153
  )
139
154
 
@@ -146,12 +161,15 @@ class AnthropicModel(DeepEvalBaseLLM):
146
161
  ###############################################
147
162
 
148
163
  def load_model(self, async_mode: bool = False):
149
- if not async_mode:
150
- return self._build_client(Anthropic)
151
- return self._build_client(AsyncAnthropic)
164
+ module = require_dependency(
165
+ "anthropic",
166
+ provider_label="AnthropicModel",
167
+ install_hint="Install it with `pip install anthropic`.",
168
+ )
152
169
 
153
- def get_model_name(self):
154
- return f"{self.model_name}"
170
+ if not async_mode:
171
+ return self._build_client(module.Anthropic)
172
+ return self._build_client(module.AsyncAnthropic)
155
173
 
156
174
  def _client_kwargs(self) -> Dict:
157
175
  kwargs = dict(self.kwargs or {})
@@ -163,10 +181,10 @@ class AnthropicModel(DeepEvalBaseLLM):
163
181
 
164
182
  def _build_client(self, cls):
165
183
  api_key = require_secret_api_key(
166
- self._anthropic_api_key,
184
+ self.api_key,
167
185
  provider_label="Anthropic",
168
186
  env_var_name="ANTHROPIC_API_KEY",
169
- param_hint="`_anthropic_api_key` to AnthropicModel(...)",
187
+ param_hint="`api_key` to AnthropicModel(...)",
170
188
  )
171
189
  kw = dict(
172
190
  api_key=api_key,
@@ -180,3 +198,6 @@ class AnthropicModel(DeepEvalBaseLLM):
180
198
  kw.pop("max_retries", None)
181
199
  return cls(**kw)
182
200
  raise
201
+
202
+ def get_model_name(self):
203
+ return f"{self.name} (Anthropic)"