deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/dataset/golden.py +54 -2
  5. deepeval/evaluate/configs.py +1 -1
  6. deepeval/evaluate/evaluate.py +16 -8
  7. deepeval/evaluate/execute.py +74 -27
  8. deepeval/evaluate/utils.py +26 -22
  9. deepeval/integrations/pydantic_ai/agent.py +19 -2
  10. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. deepeval/metrics/__init__.py +14 -12
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  13. deepeval/metrics/answer_relevancy/template.py +188 -92
  14. deepeval/metrics/argument_correctness/template.py +2 -2
  15. deepeval/metrics/base_metric.py +2 -5
  16. deepeval/metrics/bias/template.py +3 -3
  17. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  18. deepeval/metrics/contextual_precision/template.py +115 -66
  19. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  20. deepeval/metrics/contextual_recall/template.py +106 -55
  21. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  22. deepeval/metrics/contextual_relevancy/template.py +87 -58
  23. deepeval/metrics/conversation_completeness/template.py +2 -2
  24. deepeval/metrics/conversational_dag/templates.py +4 -4
  25. deepeval/metrics/conversational_g_eval/template.py +4 -3
  26. deepeval/metrics/dag/templates.py +5 -5
  27. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  28. deepeval/metrics/faithfulness/schema.py +1 -1
  29. deepeval/metrics/faithfulness/template.py +200 -115
  30. deepeval/metrics/g_eval/utils.py +2 -2
  31. deepeval/metrics/hallucination/template.py +4 -4
  32. deepeval/metrics/indicator.py +4 -4
  33. deepeval/metrics/misuse/template.py +2 -2
  34. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  35. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  36. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  37. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  38. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  39. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  40. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  41. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  42. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  43. deepeval/metrics/non_advice/template.py +2 -2
  44. deepeval/metrics/pii_leakage/template.py +2 -2
  45. deepeval/metrics/prompt_alignment/template.py +4 -4
  46. deepeval/metrics/ragas.py +3 -3
  47. deepeval/metrics/role_violation/template.py +2 -2
  48. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  49. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  50. deepeval/metrics/toxicity/template.py +4 -4
  51. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  52. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  53. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  54. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  55. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  56. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  57. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  58. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  59. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  60. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  61. deepeval/metrics/turn_faithfulness/template.py +218 -0
  62. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  63. deepeval/metrics/turn_relevancy/template.py +2 -2
  64. deepeval/metrics/utils.py +39 -58
  65. deepeval/models/__init__.py +0 -12
  66. deepeval/models/base_model.py +16 -38
  67. deepeval/models/embedding_models/__init__.py +7 -0
  68. deepeval/models/embedding_models/azure_embedding_model.py +69 -32
  69. deepeval/models/embedding_models/local_embedding_model.py +39 -22
  70. deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
  71. deepeval/models/embedding_models/openai_embedding_model.py +50 -15
  72. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  73. deepeval/models/llms/anthropic_model.py +53 -20
  74. deepeval/models/llms/azure_model.py +140 -43
  75. deepeval/models/llms/deepseek_model.py +38 -23
  76. deepeval/models/llms/gemini_model.py +222 -103
  77. deepeval/models/llms/grok_model.py +39 -27
  78. deepeval/models/llms/kimi_model.py +39 -23
  79. deepeval/models/llms/litellm_model.py +103 -45
  80. deepeval/models/llms/local_model.py +35 -22
  81. deepeval/models/llms/ollama_model.py +129 -17
  82. deepeval/models/llms/openai_model.py +151 -50
  83. deepeval/models/llms/portkey_model.py +149 -0
  84. deepeval/models/llms/utils.py +5 -3
  85. deepeval/models/retry_policy.py +17 -14
  86. deepeval/models/utils.py +94 -4
  87. deepeval/optimizer/__init__.py +5 -0
  88. deepeval/optimizer/algorithms/__init__.py +6 -0
  89. deepeval/optimizer/algorithms/base.py +29 -0
  90. deepeval/optimizer/algorithms/configs.py +18 -0
  91. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  92. deepeval/optimizer/algorithms/copro/copro.py +836 -0
  93. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  94. deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
  95. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  96. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  97. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  98. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  99. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  100. deepeval/optimizer/algorithms/simba/simba.py +999 -0
  101. deepeval/optimizer/algorithms/simba/types.py +15 -0
  102. deepeval/optimizer/configs.py +31 -0
  103. deepeval/optimizer/policies.py +227 -0
  104. deepeval/optimizer/prompt_optimizer.py +263 -0
  105. deepeval/optimizer/rewriter/__init__.py +5 -0
  106. deepeval/optimizer/rewriter/rewriter.py +124 -0
  107. deepeval/optimizer/rewriter/utils.py +214 -0
  108. deepeval/optimizer/scorer/__init__.py +5 -0
  109. deepeval/optimizer/scorer/base.py +86 -0
  110. deepeval/optimizer/scorer/scorer.py +316 -0
  111. deepeval/optimizer/scorer/utils.py +30 -0
  112. deepeval/optimizer/types.py +148 -0
  113. deepeval/optimizer/utils.py +480 -0
  114. deepeval/prompt/prompt.py +7 -6
  115. deepeval/test_case/__init__.py +1 -3
  116. deepeval/test_case/api.py +12 -10
  117. deepeval/test_case/conversational_test_case.py +19 -1
  118. deepeval/test_case/llm_test_case.py +152 -1
  119. deepeval/test_case/utils.py +4 -8
  120. deepeval/test_run/api.py +15 -14
  121. deepeval/test_run/cache.py +2 -0
  122. deepeval/test_run/test_run.py +9 -4
  123. deepeval/tracing/patchers.py +9 -4
  124. deepeval/tracing/tracing.py +2 -2
  125. deepeval/utils.py +89 -0
  126. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  127. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
  128. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  129. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  130. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  131. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  132. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  133. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  134. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  135. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  136. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  137. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  138. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  139. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  140. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  141. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  142. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  143. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  144. deepeval/models/mlllms/__init__.py +0 -4
  145. deepeval/models/mlllms/azure_model.py +0 -334
  146. deepeval/models/mlllms/gemini_model.py +0 -284
  147. deepeval/models/mlllms/ollama_model.py +0 -144
  148. deepeval/models/mlllms/openai_model.py +0 -258
  149. deepeval/test_case/mllm_test_case.py +0 -170
  150. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  152. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  153. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  154. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  155. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  156. {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,11 @@
1
- from ollama import Client, AsyncClient
2
1
  from typing import List, Optional, Dict
3
2
 
4
- from deepeval.key_handler import EmbeddingKeyValues, KEY_FILE_HANDLER
3
+ from deepeval.config.settings import get_settings
4
+ from deepeval.utils import require_dependency
5
5
  from deepeval.models import DeepEvalBaseEmbeddingModel
6
+ from deepeval.models.utils import (
7
+ normalize_kwargs_and_extract_aliases,
8
+ )
6
9
  from deepeval.models.retry_policy import (
7
10
  create_retry_decorator,
8
11
  )
@@ -11,30 +14,45 @@ from deepeval.constants import ProviderSlug as PS
11
14
 
12
15
  retry_ollama = create_retry_decorator(PS.OLLAMA)
13
16
 
17
+ _ALIAS_MAP = {"base_url": ["host"]}
18
+
14
19
 
15
20
  class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
16
21
  def __init__(
17
22
  self,
18
23
  model: Optional[str] = None,
19
- host: Optional[str] = None,
24
+ base_url: Optional[str] = None,
20
25
  generation_kwargs: Optional[Dict] = None,
21
- **client_kwargs,
26
+ **kwargs,
22
27
  ):
23
- self.host = host or KEY_FILE_HANDLER.fetch_data(
24
- EmbeddingKeyValues.LOCAL_EMBEDDING_BASE_URL
28
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
29
+ "OllamaEmbeddingModel",
30
+ kwargs,
31
+ _ALIAS_MAP,
25
32
  )
26
- self.model_name = model or KEY_FILE_HANDLER.fetch_data(
27
- EmbeddingKeyValues.LOCAL_EMBEDDING_MODEL_NAME
33
+
34
+ # re-map depricated keywords to re-named positional args
35
+ if base_url is None and "base_url" in alias_values:
36
+ base_url = alias_values["base_url"]
37
+
38
+ settings = get_settings()
39
+
40
+ self.base_url = (
41
+ base_url
42
+ or settings.LOCAL_EMBEDDING_BASE_URL
43
+ and str(settings.LOCAL_EMBEDDING_BASE_URL)
28
44
  )
29
- self.client_kwargs = client_kwargs or {}
45
+ model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
46
+ # Keep sanitized kwargs for client call to strip legacy keys
47
+ self.kwargs = normalized_kwargs
30
48
  self.generation_kwargs = generation_kwargs or {}
31
- super().__init__(self.model_name)
49
+ super().__init__(model)
32
50
 
33
51
  @retry_ollama
34
52
  def embed_text(self, text: str) -> List[float]:
35
53
  embedding_model = self.load_model()
36
54
  response = embedding_model.embed(
37
- model=self.model_name, input=text, **self.generation_kwargs
55
+ model=self.name, input=text, **self.generation_kwargs
38
56
  )
39
57
  return response["embeddings"][0]
40
58
 
@@ -42,7 +60,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
42
60
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
43
61
  embedding_model = self.load_model()
44
62
  response = embedding_model.embed(
45
- model=self.model_name, input=texts, **self.generation_kwargs
63
+ model=self.name, input=texts, **self.generation_kwargs
46
64
  )
47
65
  return response["embeddings"]
48
66
 
@@ -50,7 +68,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
50
68
  async def a_embed_text(self, text: str) -> List[float]:
51
69
  embedding_model = self.load_model(async_mode=True)
52
70
  response = await embedding_model.embed(
53
- model=self.model_name, input=text, **self.generation_kwargs
71
+ model=self.name, input=text, **self.generation_kwargs
54
72
  )
55
73
  return response["embeddings"][0]
56
74
 
@@ -58,7 +76,7 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
58
76
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
59
77
  embedding_model = self.load_model(async_mode=True)
60
78
  response = await embedding_model.embed(
61
- model=self.model_name, input=texts, **self.generation_kwargs
79
+ model=self.name, input=texts, **self.generation_kwargs
62
80
  )
63
81
  return response["embeddings"]
64
82
 
@@ -67,12 +85,18 @@ class OllamaEmbeddingModel(DeepEvalBaseEmbeddingModel):
67
85
  ###############################################
68
86
 
69
87
  def load_model(self, async_mode: bool = False):
88
+ ollama = require_dependency(
89
+ "ollama",
90
+ provider_label="OllamaEmbeddingModel",
91
+ install_hint="Install it with `pip install ollama`.",
92
+ )
93
+
70
94
  if not async_mode:
71
- return self._build_client(Client)
72
- return self._build_client(AsyncClient)
95
+ return self._build_client(ollama.Client)
96
+ return self._build_client(ollama.AsyncClient)
73
97
 
74
98
  def _build_client(self, cls):
75
- return cls(host=self.host, **self.client_kwargs)
99
+ return cls(host=self.base_url, **self.kwargs)
76
100
 
77
101
  def get_model_name(self):
78
- return f"{self.model_name} (Ollama)"
102
+ return f"{self.name} (Ollama)"
@@ -1,5 +1,12 @@
1
1
  from typing import Dict, Optional, List
2
2
  from openai import OpenAI, AsyncOpenAI
3
+ from pydantic import SecretStr
4
+
5
+ from deepeval.config.settings import get_settings
6
+ from deepeval.models.utils import (
7
+ require_secret_api_key,
8
+ normalize_kwargs_and_extract_aliases,
9
+ )
3
10
  from deepeval.models import DeepEvalBaseEmbeddingModel
4
11
  from deepeval.models.retry_policy import (
5
12
  create_retry_decorator,
@@ -15,32 +22,53 @@ valid_openai_embedding_models = [
15
22
  "text-embedding-3-large",
16
23
  "text-embedding-ada-002",
17
24
  ]
25
+
18
26
  default_openai_embedding_model = "text-embedding-3-small"
19
27
 
28
+ _ALIAS_MAP = {
29
+ "api_key": ["openai_api_key"],
30
+ }
31
+
20
32
 
21
33
  class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
22
34
 
23
35
  def __init__(
24
36
  self,
25
37
  model: Optional[str] = None,
26
- openai_api_key: Optional[str] = None,
38
+ api_key: Optional[str] = None,
27
39
  generation_kwargs: Optional[Dict] = None,
28
- **client_kwargs,
40
+ **kwargs,
29
41
  ):
30
- self.openai_api_key = openai_api_key
31
- self.model_name = model if model else default_openai_embedding_model
32
- if self.model_name not in valid_openai_embedding_models:
42
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
43
+ "OpenAIEmbeddingModel",
44
+ kwargs,
45
+ _ALIAS_MAP,
46
+ )
47
+
48
+ # re-map depricated keywords to re-named positional args
49
+ if api_key is None and "api_key" in alias_values:
50
+ api_key = alias_values["api_key"]
51
+
52
+ if api_key is not None:
53
+ # keep it secret, keep it safe from serializings, logging and alike
54
+ self.api_key: SecretStr | None = SecretStr(api_key)
55
+ else:
56
+ self.api_key = get_settings().OPENAI_API_KEY
57
+
58
+ model = model if model else default_openai_embedding_model
59
+ if model not in valid_openai_embedding_models:
33
60
  raise ValueError(
34
61
  f"Invalid model. Available OpenAI Embedding models: {', '.join(valid_openai_embedding_models)}"
35
62
  )
36
- self.client_kwargs = client_kwargs or {}
63
+ self.kwargs = normalized_kwargs
37
64
  self.generation_kwargs = generation_kwargs or {}
65
+ super().__init__(model)
38
66
 
39
67
  @retry_openai
40
68
  def embed_text(self, text: str) -> List[float]:
41
69
  client = self.load_model(async_mode=False)
42
70
  response = client.embeddings.create(
43
- input=text, model=self.model_name, **self.generation_kwargs
71
+ input=text, model=self.name, **self.generation_kwargs
44
72
  )
45
73
  return response.data[0].embedding
46
74
 
@@ -48,7 +76,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
48
76
  def embed_texts(self, texts: List[str]) -> List[List[float]]:
49
77
  client = self.load_model(async_mode=False)
50
78
  response = client.embeddings.create(
51
- input=texts, model=self.model_name, **self.generation_kwargs
79
+ input=texts, model=self.name, **self.generation_kwargs
52
80
  )
53
81
  return [item.embedding for item in response.data]
54
82
 
@@ -56,7 +84,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
56
84
  async def a_embed_text(self, text: str) -> List[float]:
57
85
  client = self.load_model(async_mode=True)
58
86
  response = await client.embeddings.create(
59
- input=text, model=self.model_name, **self.generation_kwargs
87
+ input=text, model=self.name, **self.generation_kwargs
60
88
  )
61
89
  return response.data[0].embedding
62
90
 
@@ -64,7 +92,7 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
64
92
  async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
65
93
  client = self.load_model(async_mode=True)
66
94
  response = await client.embeddings.create(
67
- input=texts, model=self.model_name, **self.generation_kwargs
95
+ input=texts, model=self.name, **self.generation_kwargs
68
96
  )
69
97
  return [item.embedding for item in response.data]
70
98
 
@@ -72,21 +100,25 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
72
100
  # Model
73
101
  ###############################################
74
102
 
75
- def get_model_name(self):
76
- return self.model_name
77
-
78
103
  def load_model(self, async_mode: bool = False):
79
104
  if not async_mode:
80
105
  return self._build_client(OpenAI)
81
106
  return self._build_client(AsyncOpenAI)
82
107
 
83
108
  def _build_client(self, cls):
84
- client_kwargs = self.client_kwargs.copy()
109
+ api_key = require_secret_api_key(
110
+ self.api_key,
111
+ provider_label="OpenAI",
112
+ env_var_name="OPENAI_API_KEY",
113
+ param_hint="`api_key` to OpenAIEmbeddingModel(...)",
114
+ )
115
+
116
+ client_kwargs = self.kwargs.copy()
85
117
  if not sdk_retries_for(PS.OPENAI):
86
118
  client_kwargs["max_retries"] = 0
87
119
 
88
120
  client_init_kwargs = dict(
89
- api_key=self.openai_api_key,
121
+ api_key=api_key,
90
122
  **client_kwargs,
91
123
  )
92
124
  try:
@@ -97,3 +129,6 @@ class OpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
97
129
  client_init_kwargs.pop("max_retries", None)
98
130
  return cls(**client_init_kwargs)
99
131
  raise
132
+
133
+ def get_model_name(self):
134
+ return f"{self.name} (OpenAI)"
@@ -1,5 +1,3 @@
1
- import asyncio
2
-
3
1
  from typing import Optional, Tuple, Union, Dict
4
2
  from contextlib import AsyncExitStack
5
3
  from pydantic import BaseModel
@@ -76,6 +74,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
76
74
  async def a_generate(
77
75
  self, prompt: str, schema: Optional[BaseModel] = None
78
76
  ) -> Tuple[Union[str, Dict], float]:
77
+
79
78
  try:
80
79
  payload = self.get_converse_request_body(prompt)
81
80
  client = await self._ensure_client()
@@ -1,8 +1,7 @@
1
1
  import warnings
2
2
 
3
3
  from typing import Optional, Tuple, Union, Dict
4
- from anthropic import Anthropic, AsyncAnthropic
5
- from pydantic import BaseModel
4
+ from pydantic import BaseModel, SecretStr
6
5
 
7
6
  from deepeval.models import DeepEvalBaseLLM
8
7
  from deepeval.models.llms.utils import trim_and_load_json
@@ -10,10 +9,13 @@ from deepeval.models.retry_policy import (
10
9
  create_retry_decorator,
11
10
  sdk_retries_for,
12
11
  )
13
- from deepeval.models.utils import parse_model_name
12
+ from deepeval.models.utils import (
13
+ require_secret_api_key,
14
+ normalize_kwargs_and_extract_aliases,
15
+ )
14
16
  from deepeval.config.settings import get_settings
15
17
  from deepeval.constants import ProviderSlug as PS
16
-
18
+ from deepeval.utils import require_dependency
17
19
 
18
20
  # consistent retry rules
19
21
  retry_anthropic = create_retry_decorator(PS.ANTHROPIC)
@@ -30,26 +32,44 @@ model_pricing = {
30
32
  "claude-instant-1.2": {"input": 0.80 / 1e6, "output": 2.40 / 1e6},
31
33
  }
32
34
 
35
+ _ALIAS_MAP = {
36
+ "api_key": ["_anthropic_api_key"],
37
+ }
38
+
33
39
 
34
40
  class AnthropicModel(DeepEvalBaseLLM):
35
41
  def __init__(
36
42
  self,
37
43
  model: str = "claude-3-7-sonnet-latest",
44
+ api_key: Optional[str] = None,
38
45
  temperature: float = 0,
39
- _anthropic_api_key: Optional[str] = None,
40
46
  generation_kwargs: Optional[Dict] = None,
41
47
  **kwargs,
42
48
  ):
43
- model_name = parse_model_name(model)
44
- self._anthropic_api_key = _anthropic_api_key
49
+ normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
50
+ "AnthropicModel",
51
+ kwargs,
52
+ _ALIAS_MAP,
53
+ )
54
+
55
+ # re-map depricated keywords to re-named positional args
56
+ if api_key is None and "api_key" in alias_values:
57
+ api_key = alias_values["api_key"]
58
+
59
+ if api_key is not None:
60
+ # keep it secret, keep it safe from serializings, logging and alike
61
+ self.api_key: SecretStr | None = SecretStr(api_key)
62
+ else:
63
+ self.api_key = get_settings().ANTHROPIC_API_KEY
45
64
 
46
65
  if temperature < 0:
47
66
  raise ValueError("Temperature must be >= 0.")
48
67
  self.temperature = temperature
49
68
 
50
- self.kwargs = kwargs
69
+ # Keep sanitized kwargs for client call to strip legacy keys
70
+ self.kwargs = normalized_kwargs
51
71
  self.generation_kwargs = generation_kwargs or {}
52
- super().__init__(model_name)
72
+ super().__init__(model)
53
73
 
54
74
  ###############################################
55
75
  # Generate functions
@@ -59,6 +79,7 @@ class AnthropicModel(DeepEvalBaseLLM):
59
79
  def generate(
60
80
  self, prompt: str, schema: Optional[BaseModel] = None
61
81
  ) -> Tuple[Union[str, Dict], float]:
82
+
62
83
  chat_model = self.load_model()
63
84
  message = chat_model.messages.create(
64
85
  max_tokens=1024,
@@ -68,7 +89,7 @@ class AnthropicModel(DeepEvalBaseLLM):
68
89
  "content": prompt,
69
90
  }
70
91
  ],
71
- model=self.model_name,
92
+ model=self.name,
72
93
  temperature=self.temperature,
73
94
  **self.generation_kwargs,
74
95
  )
@@ -85,6 +106,7 @@ class AnthropicModel(DeepEvalBaseLLM):
85
106
  async def a_generate(
86
107
  self, prompt: str, schema: Optional[BaseModel] = None
87
108
  ) -> Tuple[str, float]:
109
+
88
110
  chat_model = self.load_model(async_mode=True)
89
111
  message = await chat_model.messages.create(
90
112
  max_tokens=1024,
@@ -94,7 +116,7 @@ class AnthropicModel(DeepEvalBaseLLM):
94
116
  "content": prompt,
95
117
  }
96
118
  ],
97
- model=self.model_name,
119
+ model=self.name,
98
120
  temperature=self.temperature,
99
121
  **self.generation_kwargs,
100
122
  )
@@ -113,7 +135,7 @@ class AnthropicModel(DeepEvalBaseLLM):
113
135
  ###############################################
114
136
 
115
137
  def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
116
- pricing = model_pricing.get(self.model_name)
138
+ pricing = model_pricing.get(self.name)
117
139
 
118
140
  if pricing is None:
119
141
  # Calculate average cost from all known models
@@ -126,7 +148,7 @@ class AnthropicModel(DeepEvalBaseLLM):
126
148
  pricing = {"input": avg_input_cost, "output": avg_output_cost}
127
149
 
128
150
  warnings.warn(
129
- f"[Warning] Pricing not defined for model '{self.model_name}'. "
151
+ f"[Warning] Pricing not defined for model '{self.name}'. "
130
152
  "Using average input/output token costs from existing model_pricing."
131
153
  )
132
154
 
@@ -139,12 +161,15 @@ class AnthropicModel(DeepEvalBaseLLM):
139
161
  ###############################################
140
162
 
141
163
  def load_model(self, async_mode: bool = False):
142
- if not async_mode:
143
- return self._build_client(Anthropic)
144
- return self._build_client(AsyncAnthropic)
164
+ module = require_dependency(
165
+ "anthropic",
166
+ provider_label="AnthropicModel",
167
+ install_hint="Install it with `pip install anthropic`.",
168
+ )
145
169
 
146
- def get_model_name(self):
147
- return f"{self.model_name}"
170
+ if not async_mode:
171
+ return self._build_client(module.Anthropic)
172
+ return self._build_client(module.AsyncAnthropic)
148
173
 
149
174
  def _client_kwargs(self) -> Dict:
150
175
  kwargs = dict(self.kwargs or {})
@@ -155,9 +180,14 @@ class AnthropicModel(DeepEvalBaseLLM):
155
180
  return kwargs
156
181
 
157
182
  def _build_client(self, cls):
158
- settings = get_settings()
183
+ api_key = require_secret_api_key(
184
+ self.api_key,
185
+ provider_label="Anthropic",
186
+ env_var_name="ANTHROPIC_API_KEY",
187
+ param_hint="`api_key` to AnthropicModel(...)",
188
+ )
159
189
  kw = dict(
160
- api_key=settings.ANTHROPIC_API_KEY or self._anthropic_api_key,
190
+ api_key=api_key,
161
191
  **self._client_kwargs(),
162
192
  )
163
193
  try:
@@ -168,3 +198,6 @@ class AnthropicModel(DeepEvalBaseLLM):
168
198
  kw.pop("max_retries", None)
169
199
  return cls(**kw)
170
200
  raise
201
+
202
+ def get_model_name(self):
203
+ return f"{self.name} (Anthropic)"