deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from typing import Optional, Tuple, Union, Dict
|
|
2
2
|
from openai import OpenAI, AsyncOpenAI
|
|
3
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
4
4
|
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
5
6
|
from deepeval.models.retry_policy import (
|
|
6
7
|
create_retry_decorator,
|
|
7
8
|
sdk_retries_for,
|
|
8
9
|
)
|
|
9
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
10
10
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
11
|
+
from deepeval.models.utils import (
|
|
12
|
+
require_secret_api_key,
|
|
13
|
+
)
|
|
11
14
|
from deepeval.models import DeepEvalBaseLLM
|
|
12
15
|
from deepeval.constants import ProviderSlug as PS
|
|
13
16
|
|
|
@@ -73,35 +76,39 @@ model_pricing = {
|
|
|
73
76
|
class KimiModel(DeepEvalBaseLLM):
|
|
74
77
|
def __init__(
|
|
75
78
|
self,
|
|
76
|
-
api_key: Optional[str] = None,
|
|
77
79
|
model: Optional[str] = None,
|
|
80
|
+
api_key: Optional[str] = None,
|
|
78
81
|
temperature: float = 0,
|
|
79
82
|
generation_kwargs: Optional[Dict] = None,
|
|
80
83
|
**kwargs,
|
|
81
84
|
):
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if
|
|
85
|
+
settings = get_settings()
|
|
86
|
+
|
|
87
|
+
model = model or settings.MOONSHOT_MODEL_NAME
|
|
88
|
+
if model not in model_pricing:
|
|
86
89
|
raise ValueError(
|
|
87
90
|
f"Invalid model. Available Moonshot models: {', '.join(model_pricing.keys())}"
|
|
88
91
|
)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
)
|
|
92
|
+
|
|
93
|
+
temperature_from_key = settings.TEMPERATURE
|
|
92
94
|
if temperature_from_key is None:
|
|
93
95
|
self.temperature = temperature
|
|
94
96
|
else:
|
|
95
97
|
self.temperature = float(temperature_from_key)
|
|
96
98
|
if self.temperature < 0:
|
|
97
99
|
raise ValueError("Temperature must be >= 0.")
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
|
|
101
|
+
if api_key is not None:
|
|
102
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
103
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
104
|
+
else:
|
|
105
|
+
self.api_key = settings.MOONSHOT_API_KEY
|
|
106
|
+
|
|
101
107
|
self.base_url = "https://api.moonshot.cn/v1"
|
|
108
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
102
109
|
self.kwargs = kwargs
|
|
103
110
|
self.generation_kwargs = generation_kwargs or {}
|
|
104
|
-
super().__init__(
|
|
111
|
+
super().__init__(model)
|
|
105
112
|
|
|
106
113
|
###############################################
|
|
107
114
|
# Other generate functions
|
|
@@ -111,10 +118,11 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
111
118
|
def generate(
|
|
112
119
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
113
120
|
) -> Tuple[Union[str, Dict], float]:
|
|
121
|
+
|
|
114
122
|
client = self.load_model(async_mode=False)
|
|
115
|
-
if schema and self.
|
|
123
|
+
if schema and self.name in json_mode_models:
|
|
116
124
|
completion = client.chat.completions.create(
|
|
117
|
-
model=self.
|
|
125
|
+
model=self.name,
|
|
118
126
|
messages=[{"role": "user", "content": prompt}],
|
|
119
127
|
response_format={"type": "json_object"},
|
|
120
128
|
temperature=self.temperature,
|
|
@@ -130,7 +138,7 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
130
138
|
return schema.model_validate(json_output), cost
|
|
131
139
|
|
|
132
140
|
completion = client.chat.completions.create(
|
|
133
|
-
model=self.
|
|
141
|
+
model=self.name,
|
|
134
142
|
messages=[{"role": "user", "content": prompt}],
|
|
135
143
|
**self.generation_kwargs,
|
|
136
144
|
)
|
|
@@ -149,10 +157,11 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
149
157
|
async def a_generate(
|
|
150
158
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
151
159
|
) -> Tuple[Union[str, Dict], float]:
|
|
160
|
+
|
|
152
161
|
client = self.load_model(async_mode=True)
|
|
153
|
-
if schema and self.
|
|
162
|
+
if schema and self.name in json_mode_models:
|
|
154
163
|
completion = await client.chat.completions.create(
|
|
155
|
-
model=self.
|
|
164
|
+
model=self.name,
|
|
156
165
|
messages=[{"role": "user", "content": prompt}],
|
|
157
166
|
response_format={"type": "json_object"},
|
|
158
167
|
temperature=self.temperature,
|
|
@@ -168,7 +177,7 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
168
177
|
return schema.model_validate(json_output), cost
|
|
169
178
|
|
|
170
179
|
completion = await client.chat.completions.create(
|
|
171
|
-
model=self.
|
|
180
|
+
model=self.name,
|
|
172
181
|
messages=[{"role": "user", "content": prompt}],
|
|
173
182
|
**self.generation_kwargs,
|
|
174
183
|
)
|
|
@@ -192,7 +201,7 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
192
201
|
input_tokens: int,
|
|
193
202
|
output_tokens: int,
|
|
194
203
|
) -> float:
|
|
195
|
-
pricing = model_pricing.get(self.
|
|
204
|
+
pricing = model_pricing.get(self.name, model_pricing)
|
|
196
205
|
input_cost = input_tokens * pricing["input"]
|
|
197
206
|
output_cost = output_tokens * pricing["output"]
|
|
198
207
|
return input_cost + output_cost
|
|
@@ -218,8 +227,15 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
218
227
|
return kwargs
|
|
219
228
|
|
|
220
229
|
def _build_client(self, cls):
|
|
230
|
+
api_key = require_secret_api_key(
|
|
231
|
+
self.api_key,
|
|
232
|
+
provider_label="Kimi",
|
|
233
|
+
env_var_name="MOONSHOT_API_KEY",
|
|
234
|
+
param_hint="`api_key` to KimiModel(...)",
|
|
235
|
+
)
|
|
236
|
+
|
|
221
237
|
kw = dict(
|
|
222
|
-
api_key=
|
|
238
|
+
api_key=api_key,
|
|
223
239
|
base_url=self.base_url,
|
|
224
240
|
**self._client_kwargs(),
|
|
225
241
|
)
|
|
@@ -233,4 +249,4 @@ class KimiModel(DeepEvalBaseLLM):
|
|
|
233
249
|
raise
|
|
234
250
|
|
|
235
251
|
def get_model_name(self):
|
|
236
|
-
return f"{self.
|
|
252
|
+
return f"{self.name} (KIMI)"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import Optional, Tuple, Union, Dict, List, Any
|
|
2
|
-
from pydantic import BaseModel
|
|
3
1
|
import logging
|
|
2
|
+
from typing import Optional, Tuple, Union, Dict, List, Any
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
4
4
|
from tenacity import (
|
|
5
5
|
retry,
|
|
6
6
|
stop_after_attempt,
|
|
@@ -8,11 +8,14 @@ from tenacity import (
|
|
|
8
8
|
wait_exponential_jitter,
|
|
9
9
|
RetryCallState,
|
|
10
10
|
)
|
|
11
|
-
import os
|
|
12
11
|
|
|
12
|
+
from deepeval.config.settings import get_settings
|
|
13
|
+
from deepeval.models.utils import (
|
|
14
|
+
require_secret_api_key,
|
|
15
|
+
normalize_kwargs_and_extract_aliases,
|
|
16
|
+
)
|
|
13
17
|
from deepeval.models import DeepEvalBaseLLM
|
|
14
18
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
15
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
def log_retry_error(retry_state: RetryCallState):
|
|
@@ -27,6 +30,10 @@ retryable_exceptions = (
|
|
|
27
30
|
Exception, # LiteLLM handles specific exceptions internally
|
|
28
31
|
)
|
|
29
32
|
|
|
33
|
+
_ALIAS_MAP = {
|
|
34
|
+
"base_url": ["api_base"],
|
|
35
|
+
}
|
|
36
|
+
|
|
30
37
|
|
|
31
38
|
class LiteLLMModel(DeepEvalBaseLLM):
|
|
32
39
|
EXP_BASE: int = 2
|
|
@@ -39,47 +46,66 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
39
46
|
self,
|
|
40
47
|
model: Optional[str] = None,
|
|
41
48
|
api_key: Optional[str] = None,
|
|
42
|
-
|
|
49
|
+
base_url: Optional[str] = None,
|
|
43
50
|
temperature: float = 0,
|
|
44
51
|
generation_kwargs: Optional[Dict] = None,
|
|
45
52
|
**kwargs,
|
|
46
53
|
):
|
|
47
|
-
from litellm import completion, acompletion, get_llm_provider
|
|
48
54
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
55
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
56
|
+
"LiteLLMModel",
|
|
57
|
+
kwargs,
|
|
58
|
+
_ALIAS_MAP,
|
|
52
59
|
)
|
|
53
|
-
|
|
60
|
+
|
|
61
|
+
# re-map depricated keywords to re-named positional args
|
|
62
|
+
if base_url is None and "base_url" in alias_values:
|
|
63
|
+
base_url = alias_values["base_url"]
|
|
64
|
+
|
|
65
|
+
settings = get_settings()
|
|
66
|
+
# Get model name from parameter or key file
|
|
67
|
+
model = model or settings.LITELLM_MODEL_NAME
|
|
68
|
+
if not model:
|
|
54
69
|
raise ValueError(
|
|
55
70
|
"Model name must be provided either through parameter or set-litellm command"
|
|
56
71
|
)
|
|
57
72
|
|
|
58
|
-
# Get API key from parameter,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
73
|
+
# Get API key from parameter, or settings
|
|
74
|
+
if api_key is not None:
|
|
75
|
+
# keep it secret, keep it safe from serializings, logging and aolike
|
|
76
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
77
|
+
else:
|
|
78
|
+
self.api_key = (
|
|
79
|
+
settings.LITELLM_API_KEY
|
|
80
|
+
or settings.LITELLM_PROXY_API_KEY
|
|
81
|
+
or settings.OPENAI_API_KEY
|
|
82
|
+
or settings.ANTHROPIC_API_KEY
|
|
83
|
+
or settings.GOOGLE_API_KEY
|
|
84
|
+
)
|
|
67
85
|
|
|
68
86
|
# Get API base from parameter, key file, or environment variable
|
|
69
|
-
self.
|
|
70
|
-
|
|
71
|
-
or
|
|
72
|
-
|
|
73
|
-
|
|
87
|
+
self.base_url = (
|
|
88
|
+
base_url
|
|
89
|
+
or (
|
|
90
|
+
str(settings.LITELLM_API_BASE)
|
|
91
|
+
if settings.LITELLM_API_BASE is not None
|
|
92
|
+
else None
|
|
93
|
+
)
|
|
94
|
+
or (
|
|
95
|
+
str(settings.LITELLM_PROXY_API_BASE)
|
|
96
|
+
if settings.LITELLM_PROXY_API_BASE is not None
|
|
97
|
+
else None
|
|
98
|
+
)
|
|
74
99
|
)
|
|
75
100
|
|
|
76
101
|
if temperature < 0:
|
|
77
102
|
raise ValueError("Temperature must be >= 0.")
|
|
78
103
|
self.temperature = temperature
|
|
79
|
-
|
|
104
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
105
|
+
self.kwargs = normalized_kwargs
|
|
80
106
|
self.generation_kwargs = generation_kwargs or {}
|
|
81
107
|
self.evaluation_cost = 0.0 # Initialize cost to 0.0
|
|
82
|
-
super().__init__(
|
|
108
|
+
super().__init__(model)
|
|
83
109
|
|
|
84
110
|
@retry(
|
|
85
111
|
wait=wait_exponential_jitter(
|
|
@@ -92,18 +118,25 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
92
118
|
def generate(
|
|
93
119
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
94
120
|
) -> Union[str, Dict, Tuple[str, float]]:
|
|
121
|
+
|
|
95
122
|
from litellm import completion
|
|
96
123
|
|
|
97
124
|
completion_params = {
|
|
98
|
-
"model": self.
|
|
125
|
+
"model": self.name,
|
|
99
126
|
"messages": [{"role": "user", "content": prompt}],
|
|
100
127
|
"temperature": self.temperature,
|
|
101
128
|
}
|
|
102
129
|
|
|
103
130
|
if self.api_key:
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
131
|
+
api_key = require_secret_api_key(
|
|
132
|
+
self.api_key,
|
|
133
|
+
provider_label="LiteLLM",
|
|
134
|
+
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
135
|
+
param_hint="`api_key` to LiteLLMModel(...)",
|
|
136
|
+
)
|
|
137
|
+
completion_params["api_key"] = api_key
|
|
138
|
+
if self.base_url:
|
|
139
|
+
completion_params["api_base"] = self.base_url
|
|
107
140
|
|
|
108
141
|
# Add schema if provided
|
|
109
142
|
if schema:
|
|
@@ -141,18 +174,25 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
141
174
|
async def a_generate(
|
|
142
175
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
143
176
|
) -> Union[str, Dict, Tuple[str, float]]:
|
|
177
|
+
|
|
144
178
|
from litellm import acompletion
|
|
145
179
|
|
|
146
180
|
completion_params = {
|
|
147
|
-
"model": self.
|
|
181
|
+
"model": self.name,
|
|
148
182
|
"messages": [{"role": "user", "content": prompt}],
|
|
149
183
|
"temperature": self.temperature,
|
|
150
184
|
}
|
|
151
185
|
|
|
152
186
|
if self.api_key:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
187
|
+
api_key = require_secret_api_key(
|
|
188
|
+
self.api_key,
|
|
189
|
+
provider_label="LiteLLM",
|
|
190
|
+
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
191
|
+
param_hint="`api_key` to LiteLLMModel(...)",
|
|
192
|
+
)
|
|
193
|
+
completion_params["api_key"] = api_key
|
|
194
|
+
if self.base_url:
|
|
195
|
+
completion_params["api_base"] = self.base_url
|
|
156
196
|
|
|
157
197
|
# Add schema if provided
|
|
158
198
|
if schema:
|
|
@@ -195,12 +235,18 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
195
235
|
from litellm import completion
|
|
196
236
|
|
|
197
237
|
try:
|
|
238
|
+
api_key = require_secret_api_key(
|
|
239
|
+
self.api_key,
|
|
240
|
+
provider_label="LiteLLM",
|
|
241
|
+
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
242
|
+
param_hint="`api_key` to LiteLLMModel(...)",
|
|
243
|
+
)
|
|
198
244
|
completion_params = {
|
|
199
|
-
"model": self.
|
|
245
|
+
"model": self.name,
|
|
200
246
|
"messages": [{"role": "user", "content": prompt}],
|
|
201
247
|
"temperature": self.temperature,
|
|
202
|
-
"api_key":
|
|
203
|
-
"api_base": self.
|
|
248
|
+
"api_key": api_key,
|
|
249
|
+
"api_base": self.base_url,
|
|
204
250
|
"logprobs": True,
|
|
205
251
|
"top_logprobs": top_logprobs,
|
|
206
252
|
}
|
|
@@ -230,12 +276,18 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
230
276
|
from litellm import acompletion
|
|
231
277
|
|
|
232
278
|
try:
|
|
279
|
+
api_key = require_secret_api_key(
|
|
280
|
+
self.api_key,
|
|
281
|
+
provider_label="LiteLLM",
|
|
282
|
+
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
283
|
+
param_hint="`api_key` to LiteLLMModel(...)",
|
|
284
|
+
)
|
|
233
285
|
completion_params = {
|
|
234
|
-
"model": self.
|
|
286
|
+
"model": self.name,
|
|
235
287
|
"messages": [{"role": "user", "content": prompt}],
|
|
236
288
|
"temperature": self.temperature,
|
|
237
|
-
"api_key":
|
|
238
|
-
"api_base": self.
|
|
289
|
+
"api_key": api_key,
|
|
290
|
+
"api_base": self.base_url,
|
|
239
291
|
"logprobs": True,
|
|
240
292
|
"top_logprobs": top_logprobs,
|
|
241
293
|
}
|
|
@@ -263,13 +315,19 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
263
315
|
from litellm import completion
|
|
264
316
|
|
|
265
317
|
try:
|
|
318
|
+
api_key = require_secret_api_key(
|
|
319
|
+
self.api_key,
|
|
320
|
+
provider_label="LiteLLM",
|
|
321
|
+
env_var_name="LITELLM_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GOOGLE_API_KEY",
|
|
322
|
+
param_hint="`api_key` to LiteLLMModel(...)",
|
|
323
|
+
)
|
|
266
324
|
completion_params = {
|
|
267
|
-
"model": self.
|
|
325
|
+
"model": self.name,
|
|
268
326
|
"messages": [{"role": "user", "content": prompt}],
|
|
269
327
|
"temperature": temperature,
|
|
270
328
|
"n": n,
|
|
271
|
-
"api_key":
|
|
272
|
-
"api_base": self.
|
|
329
|
+
"api_key": api_key,
|
|
330
|
+
"api_base": self.base_url,
|
|
273
331
|
}
|
|
274
332
|
completion_params.update(self.kwargs)
|
|
275
333
|
|
|
@@ -315,8 +373,8 @@ class LiteLLMModel(DeepEvalBaseLLM):
|
|
|
315
373
|
def get_model_name(self) -> str:
|
|
316
374
|
from litellm import get_llm_provider
|
|
317
375
|
|
|
318
|
-
provider = get_llm_provider(self.
|
|
319
|
-
return f"{self.
|
|
376
|
+
provider = get_llm_provider(self.name)
|
|
377
|
+
return f"{self.name} ({provider})"
|
|
320
378
|
|
|
321
379
|
def load_model(self, async_mode: bool = False):
|
|
322
380
|
"""
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
from typing import Optional, Tuple, Union, Dict
|
|
2
|
-
from pydantic import BaseModel
|
|
2
|
+
from pydantic import BaseModel, SecretStr
|
|
3
3
|
from openai import OpenAI, AsyncOpenAI
|
|
4
4
|
from openai.types.chat import ChatCompletion
|
|
5
5
|
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
6
7
|
from deepeval.models.retry_policy import (
|
|
7
8
|
create_retry_decorator,
|
|
8
9
|
sdk_retries_for,
|
|
9
10
|
)
|
|
10
11
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
12
|
+
from deepeval.models.utils import (
|
|
13
|
+
require_secret_api_key,
|
|
14
|
+
)
|
|
11
15
|
from deepeval.models import DeepEvalBaseLLM
|
|
12
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
13
16
|
from deepeval.constants import ProviderSlug as PS
|
|
14
17
|
|
|
15
18
|
|
|
@@ -21,31 +24,35 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
21
24
|
def __init__(
|
|
22
25
|
self,
|
|
23
26
|
model: Optional[str] = None,
|
|
24
|
-
base_url: Optional[str] = None,
|
|
25
27
|
api_key: Optional[str] = None,
|
|
28
|
+
base_url: Optional[str] = None,
|
|
26
29
|
temperature: float = 0,
|
|
27
30
|
format: Optional[str] = None,
|
|
28
31
|
generation_kwargs: Optional[Dict] = None,
|
|
29
32
|
**kwargs,
|
|
30
33
|
):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
self.
|
|
41
|
-
|
|
34
|
+
settings = get_settings()
|
|
35
|
+
|
|
36
|
+
model = model or settings.LOCAL_MODEL_NAME
|
|
37
|
+
if api_key is not None:
|
|
38
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
39
|
+
self.local_model_api_key: SecretStr | None = SecretStr(api_key)
|
|
40
|
+
else:
|
|
41
|
+
self.local_model_api_key = settings.LOCAL_MODEL_API_KEY
|
|
42
|
+
|
|
43
|
+
self.base_url = (
|
|
44
|
+
base_url
|
|
45
|
+
or settings.LOCAL_MODEL_BASE_URL
|
|
46
|
+
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
42
47
|
)
|
|
48
|
+
self.format = format or settings.LOCAL_MODEL_FORMAT
|
|
43
49
|
if temperature < 0:
|
|
44
50
|
raise ValueError("Temperature must be >= 0.")
|
|
45
51
|
self.temperature = temperature
|
|
52
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
46
53
|
self.kwargs = kwargs
|
|
47
54
|
self.generation_kwargs = generation_kwargs or {}
|
|
48
|
-
super().__init__(
|
|
55
|
+
super().__init__(model)
|
|
49
56
|
|
|
50
57
|
###############################################
|
|
51
58
|
# Other generate functions
|
|
@@ -55,9 +62,10 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
55
62
|
def generate(
|
|
56
63
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
57
64
|
) -> Tuple[Union[str, Dict], float]:
|
|
65
|
+
|
|
58
66
|
client = self.load_model(async_mode=False)
|
|
59
67
|
response: ChatCompletion = client.chat.completions.create(
|
|
60
|
-
model=self.
|
|
68
|
+
model=self.name,
|
|
61
69
|
messages=[{"role": "user", "content": prompt}],
|
|
62
70
|
temperature=self.temperature,
|
|
63
71
|
**self.generation_kwargs,
|
|
@@ -74,9 +82,10 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
74
82
|
async def a_generate(
|
|
75
83
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
76
84
|
) -> Tuple[Union[str, Dict], float]:
|
|
85
|
+
|
|
77
86
|
client = self.load_model(async_mode=True)
|
|
78
87
|
response: ChatCompletion = await client.chat.completions.create(
|
|
79
|
-
model=self.
|
|
88
|
+
model=self.name,
|
|
80
89
|
messages=[{"role": "user", "content": prompt}],
|
|
81
90
|
temperature=self.temperature,
|
|
82
91
|
**self.generation_kwargs,
|
|
@@ -94,10 +103,7 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
94
103
|
###############################################
|
|
95
104
|
|
|
96
105
|
def get_model_name(self):
|
|
97
|
-
|
|
98
|
-
ModelKeyValues.LOCAL_MODEL_NAME
|
|
99
|
-
)
|
|
100
|
-
return f"{model_name} (Local Model)"
|
|
106
|
+
return f"{self.name} (Local Model)"
|
|
101
107
|
|
|
102
108
|
def load_model(self, async_mode: bool = False):
|
|
103
109
|
if not async_mode:
|
|
@@ -115,8 +121,15 @@ class LocalModel(DeepEvalBaseLLM):
|
|
|
115
121
|
return kwargs
|
|
116
122
|
|
|
117
123
|
def _build_client(self, cls):
|
|
124
|
+
local_model_api_key = require_secret_api_key(
|
|
125
|
+
self.local_model_api_key,
|
|
126
|
+
provider_label="Local",
|
|
127
|
+
env_var_name="LOCAL_MODEL_API_KEY",
|
|
128
|
+
param_hint="`api_key` to LocalModel(...)",
|
|
129
|
+
)
|
|
130
|
+
|
|
118
131
|
kw = dict(
|
|
119
|
-
api_key=
|
|
132
|
+
api_key=local_model_api_key,
|
|
120
133
|
base_url=self.base_url,
|
|
121
134
|
**self._client_kwargs(),
|
|
122
135
|
)
|