deepeval 3.7.3__py3-none-any.whl → 3.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/execute.py +4 -1
- deepeval/metrics/answer_relevancy/template.py +4 -4
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/template.py +6 -6
- deepeval/metrics/contextual_recall/template.py +2 -2
- deepeval/metrics/contextual_relevancy/template.py +3 -3
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +4 -4
- deepeval/metrics/faithfulness/template.py +4 -4
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/models/embedding_models/azure_embedding_model.py +28 -15
- deepeval/models/embedding_models/local_embedding_model.py +23 -10
- deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
- deepeval/models/embedding_models/openai_embedding_model.py +18 -2
- deepeval/models/llms/anthropic_model.py +17 -5
- deepeval/models/llms/azure_model.py +30 -18
- deepeval/models/llms/deepseek_model.py +22 -12
- deepeval/models/llms/gemini_model.py +120 -87
- deepeval/models/llms/grok_model.py +23 -16
- deepeval/models/llms/kimi_model.py +23 -12
- deepeval/models/llms/litellm_model.py +63 -25
- deepeval/models/llms/local_model.py +26 -18
- deepeval/models/llms/ollama_model.py +17 -7
- deepeval/models/llms/openai_model.py +22 -17
- deepeval/models/llms/portkey_model.py +132 -0
- deepeval/models/mlllms/azure_model.py +28 -19
- deepeval/models/mlllms/gemini_model.py +102 -73
- deepeval/models/mlllms/ollama_model.py +40 -9
- deepeval/models/mlllms/openai_model.py +65 -14
- deepeval/models/utils.py +48 -3
- deepeval/optimization/__init__.py +13 -0
- deepeval/optimization/adapters/__init__.py +2 -0
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
- deepeval/optimization/aggregates.py +14 -0
- deepeval/optimization/configs.py +34 -0
- deepeval/optimization/copro/configs.py +31 -0
- deepeval/optimization/copro/loop.py +837 -0
- deepeval/optimization/gepa/__init__.py +7 -0
- deepeval/optimization/gepa/configs.py +115 -0
- deepeval/optimization/gepa/loop.py +677 -0
- deepeval/optimization/miprov2/configs.py +134 -0
- deepeval/optimization/miprov2/loop.py +785 -0
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +458 -0
- deepeval/optimization/policies/__init__.py +16 -0
- deepeval/optimization/policies/selection.py +166 -0
- deepeval/optimization/policies/tie_breaker.py +67 -0
- deepeval/optimization/prompt_optimizer.py +462 -0
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +33 -0
- deepeval/optimization/simba/loop.py +983 -0
- deepeval/optimization/simba/types.py +15 -0
- deepeval/optimization/types.py +361 -0
- deepeval/optimization/utils.py +598 -0
- deepeval/prompt/prompt.py +10 -5
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +6 -1
- deepeval/utils.py +24 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
- {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/RECORD +84 -59
- {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +0 -0
|
@@ -2,12 +2,11 @@ from ollama import Client, AsyncClient, ChatResponse
|
|
|
2
2
|
from typing import Optional, Tuple, Union, Dict
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
5
6
|
from deepeval.models.retry_policy import (
|
|
6
7
|
create_retry_decorator,
|
|
7
8
|
)
|
|
8
|
-
|
|
9
9
|
from deepeval.models import DeepEvalBaseLLM
|
|
10
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
11
10
|
from deepeval.constants import ProviderSlug as PS
|
|
12
11
|
|
|
13
12
|
|
|
@@ -23,17 +22,20 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
23
22
|
generation_kwargs: Optional[Dict] = None,
|
|
24
23
|
**kwargs,
|
|
25
24
|
):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
)
|
|
25
|
+
settings = get_settings()
|
|
26
|
+
model_name = model or settings.LOCAL_MODEL_NAME
|
|
29
27
|
self.base_url = (
|
|
30
28
|
base_url
|
|
31
|
-
or
|
|
29
|
+
or (
|
|
30
|
+
settings.LOCAL_MODEL_BASE_URL
|
|
31
|
+
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
32
|
+
)
|
|
32
33
|
or "http://localhost:11434"
|
|
33
34
|
)
|
|
34
35
|
if temperature < 0:
|
|
35
36
|
raise ValueError("Temperature must be >= 0.")
|
|
36
37
|
self.temperature = temperature
|
|
38
|
+
# Raw kwargs destined for the underlying Ollama client
|
|
37
39
|
self.kwargs = kwargs
|
|
38
40
|
self.generation_kwargs = generation_kwargs or {}
|
|
39
41
|
super().__init__(model_name)
|
|
@@ -97,8 +99,16 @@ class OllamaModel(DeepEvalBaseLLM):
|
|
|
97
99
|
return self._build_client(Client)
|
|
98
100
|
return self._build_client(AsyncClient)
|
|
99
101
|
|
|
102
|
+
def _client_kwargs(self) -> Dict:
|
|
103
|
+
"""Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
|
|
104
|
+
return dict(self.kwargs or {})
|
|
105
|
+
|
|
100
106
|
def _build_client(self, cls):
|
|
101
|
-
|
|
107
|
+
kw = dict(
|
|
108
|
+
host=self.base_url,
|
|
109
|
+
**self._client_kwargs(),
|
|
110
|
+
)
|
|
111
|
+
return cls(**kw)
|
|
102
112
|
|
|
103
113
|
def get_model_name(self):
|
|
104
114
|
return f"{self.model_name} (Ollama)"
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
3
2
|
from typing import Optional, Tuple, Union, Dict
|
|
4
|
-
from pydantic import BaseModel
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
5
4
|
|
|
6
5
|
from openai import (
|
|
7
6
|
OpenAI,
|
|
@@ -12,7 +11,7 @@ from deepeval.config.settings import get_settings
|
|
|
12
11
|
from deepeval.constants import ProviderSlug as PS
|
|
13
12
|
from deepeval.models import DeepEvalBaseLLM
|
|
14
13
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
15
|
-
from deepeval.models.utils import parse_model_name
|
|
14
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
16
15
|
from deepeval.models.retry_policy import (
|
|
17
16
|
create_retry_decorator,
|
|
18
17
|
sdk_retries_for,
|
|
@@ -227,23 +226,18 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
227
226
|
generation_kwargs: Optional[Dict] = None,
|
|
228
227
|
**kwargs,
|
|
229
228
|
):
|
|
229
|
+
settings = get_settings()
|
|
230
230
|
model_name = None
|
|
231
|
-
model = model or
|
|
232
|
-
ModelKeyValues.OPENAI_MODEL_NAME
|
|
233
|
-
)
|
|
231
|
+
model = model or settings.OPENAI_MODEL_NAME
|
|
234
232
|
cost_per_input_token = (
|
|
235
233
|
cost_per_input_token
|
|
236
234
|
if cost_per_input_token is not None
|
|
237
|
-
else
|
|
238
|
-
ModelKeyValues.OPENAI_COST_PER_INPUT_TOKEN
|
|
239
|
-
)
|
|
235
|
+
else settings.OPENAI_COST_PER_INPUT_TOKEN
|
|
240
236
|
)
|
|
241
237
|
cost_per_output_token = (
|
|
242
238
|
cost_per_output_token
|
|
243
239
|
if cost_per_output_token is not None
|
|
244
|
-
else
|
|
245
|
-
ModelKeyValues.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
246
|
-
)
|
|
240
|
+
else settings.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
247
241
|
)
|
|
248
242
|
|
|
249
243
|
if isinstance(model, str):
|
|
@@ -272,7 +266,12 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
272
266
|
elif model is None:
|
|
273
267
|
model_name = default_gpt_model
|
|
274
268
|
|
|
275
|
-
|
|
269
|
+
if _openai_api_key is not None:
|
|
270
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
271
|
+
self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
|
|
272
|
+
else:
|
|
273
|
+
self._openai_api_key = get_settings().OPENAI_API_KEY
|
|
274
|
+
|
|
276
275
|
self.base_url = base_url
|
|
277
276
|
# args and kwargs will be passed to the underlying model, in load_model function
|
|
278
277
|
|
|
@@ -485,9 +484,9 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
485
484
|
output_cost = output_tokens * pricing["output"]
|
|
486
485
|
return input_cost + output_cost
|
|
487
486
|
|
|
488
|
-
|
|
489
|
-
# Model
|
|
490
|
-
|
|
487
|
+
#########
|
|
488
|
+
# Model #
|
|
489
|
+
#########
|
|
491
490
|
|
|
492
491
|
def get_model_name(self):
|
|
493
492
|
return self.model_name
|
|
@@ -512,9 +511,15 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
512
511
|
return kwargs
|
|
513
512
|
|
|
514
513
|
def _build_client(self, cls):
|
|
514
|
+
api_key = require_secret_api_key(
|
|
515
|
+
self._openai_api_key,
|
|
516
|
+
provider_label="OpenAI",
|
|
517
|
+
env_var_name="OPENAI_API_KEY",
|
|
518
|
+
param_hint="`_openai_api_key` to GPTModel(...)",
|
|
519
|
+
)
|
|
515
520
|
|
|
516
521
|
kw = dict(
|
|
517
|
-
api_key=
|
|
522
|
+
api_key=api_key,
|
|
518
523
|
base_url=self.base_url,
|
|
519
524
|
**self._client_kwargs(),
|
|
520
525
|
)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from pydantic import AnyUrl, SecretStr
|
|
5
|
+
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
7
|
+
from deepeval.models.utils import require_secret_api_key
|
|
8
|
+
from deepeval.models import DeepEvalBaseLLM
|
|
9
|
+
from deepeval.utils import require_param
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PortkeyModel(DeepEvalBaseLLM):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
model: Optional[str] = None,
|
|
16
|
+
api_key: Optional[str] = None,
|
|
17
|
+
base_url: Optional[AnyUrl] = None,
|
|
18
|
+
provider: Optional[str] = None,
|
|
19
|
+
):
|
|
20
|
+
settings = get_settings()
|
|
21
|
+
model = model or settings.PORTKEY_MODEL_NAME
|
|
22
|
+
|
|
23
|
+
self.model = require_param(
|
|
24
|
+
model,
|
|
25
|
+
provider_label="Portkey",
|
|
26
|
+
env_var_name="PORTKEY_MODEL_NAME",
|
|
27
|
+
param_hint="model",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if api_key is not None:
|
|
31
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
32
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
33
|
+
else:
|
|
34
|
+
self.api_key = settings.PORTKEY_API_KEY
|
|
35
|
+
|
|
36
|
+
if base_url is not None:
|
|
37
|
+
base_url = str(base_url).rstrip("/")
|
|
38
|
+
elif settings.PORTKEY_BASE_URL is not None:
|
|
39
|
+
base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
|
|
40
|
+
|
|
41
|
+
self.base_url = require_param(
|
|
42
|
+
base_url,
|
|
43
|
+
provider_label="Portkey",
|
|
44
|
+
env_var_name="PORTKEY_BASE_URL",
|
|
45
|
+
param_hint="base_url",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
provider = provider or settings.PORTKEY_PROVIDER_NAME
|
|
49
|
+
self.provider = require_param(
|
|
50
|
+
provider,
|
|
51
|
+
provider_label="Portkey",
|
|
52
|
+
env_var_name="PORTKEY_PROVIDER_NAME",
|
|
53
|
+
param_hint="provider",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _headers(self) -> Dict[str, str]:
|
|
57
|
+
api_key = require_secret_api_key(
|
|
58
|
+
self.api_key,
|
|
59
|
+
provider_label="Portkey",
|
|
60
|
+
env_var_name="PORTKEY_API_KEY",
|
|
61
|
+
param_hint="`api_key` to PortkeyModel(...)",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
headers = {
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
"x-portkey-api-key": api_key,
|
|
67
|
+
}
|
|
68
|
+
if self.provider:
|
|
69
|
+
headers["x-portkey-provider"] = self.provider
|
|
70
|
+
return headers
|
|
71
|
+
|
|
72
|
+
def _payload(self, prompt: str) -> Dict[str, Any]:
|
|
73
|
+
return {
|
|
74
|
+
"model": self.model,
|
|
75
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
def _extract_content(self, data: Dict[str, Any]) -> str:
|
|
79
|
+
choices: Union[List[Dict[str, Any]], None] = data.get("choices")
|
|
80
|
+
if not choices:
|
|
81
|
+
raise ValueError("Portkey response did not include any choices.")
|
|
82
|
+
message = choices[0].get("message", {})
|
|
83
|
+
content: Union[str, List[Dict[str, Any]], None] = message.get("content")
|
|
84
|
+
if isinstance(content, str):
|
|
85
|
+
return content
|
|
86
|
+
if isinstance(content, list):
|
|
87
|
+
return "".join(part.get("text", "") for part in content)
|
|
88
|
+
return ""
|
|
89
|
+
|
|
90
|
+
def generate(self, prompt: str) -> str:
|
|
91
|
+
try:
|
|
92
|
+
response = requests.post(
|
|
93
|
+
f"{self.base_url}/chat/completions",
|
|
94
|
+
json=self._payload(prompt),
|
|
95
|
+
headers=self._headers(),
|
|
96
|
+
timeout=60,
|
|
97
|
+
)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
except requests.HTTPError as error:
|
|
100
|
+
body: Union[str, Dict[str, Any]]
|
|
101
|
+
try:
|
|
102
|
+
body = response.json()
|
|
103
|
+
except Exception:
|
|
104
|
+
body = response.text
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"Portkey request failed with status {response.status_code}: {body}"
|
|
107
|
+
) from error
|
|
108
|
+
except requests.RequestException as error:
|
|
109
|
+
raise ValueError(f"Portkey request failed: {error}") from error
|
|
110
|
+
return self._extract_content(response.json())
|
|
111
|
+
|
|
112
|
+
async def a_generate(self, prompt: str) -> str:
|
|
113
|
+
async with aiohttp.ClientSession() as session:
|
|
114
|
+
async with session.post(
|
|
115
|
+
f"{self.base_url}/chat/completions",
|
|
116
|
+
json=self._payload(prompt),
|
|
117
|
+
headers=self._headers(),
|
|
118
|
+
timeout=60,
|
|
119
|
+
) as response:
|
|
120
|
+
if response.status >= 400:
|
|
121
|
+
body = await response.text()
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Portkey request failed with status {response.status}: {body}"
|
|
124
|
+
)
|
|
125
|
+
data = await response.json()
|
|
126
|
+
return self._extract_content(data)
|
|
127
|
+
|
|
128
|
+
def get_model_name(self) -> str:
|
|
129
|
+
return f"Portkey ({self.model})"
|
|
130
|
+
|
|
131
|
+
def load_model(self):
|
|
132
|
+
return None
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
3
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
4
|
from typing import Optional, Tuple, Union, Dict, List
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
5
6
|
from io import BytesIO
|
|
6
|
-
import base64
|
|
7
7
|
|
|
8
|
+
from deepeval.config.settings import get_settings
|
|
8
9
|
from deepeval.models import DeepEvalBaseMLLM
|
|
9
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
10
10
|
from deepeval.test_case import MLLMImage
|
|
11
11
|
from deepeval.models.llms.openai_model import (
|
|
12
12
|
structured_outputs_models,
|
|
@@ -19,7 +19,7 @@ from deepeval.models.retry_policy import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
22
|
-
from deepeval.models.utils import parse_model_name
|
|
22
|
+
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
23
23
|
from deepeval.constants import ProviderSlug as PS
|
|
24
24
|
|
|
25
25
|
|
|
@@ -38,23 +38,25 @@ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
|
|
|
38
38
|
generation_kwargs: Optional[Dict] = None,
|
|
39
39
|
**kwargs,
|
|
40
40
|
):
|
|
41
|
+
settings = get_settings()
|
|
41
42
|
# fetch Azure deployment parameters
|
|
42
|
-
model_name = model_name or
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
model_name = model_name or settings.AZURE_MODEL_NAME
|
|
44
|
+
self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
45
|
+
if azure_openai_api_key is not None:
|
|
46
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
47
|
+
self.azure_openai_api_key: SecretStr | None = SecretStr(
|
|
48
|
+
azure_openai_api_key
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
|
|
52
|
+
|
|
52
53
|
self.openai_api_version = (
|
|
53
|
-
openai_api_version
|
|
54
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
|
|
54
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
55
55
|
)
|
|
56
|
-
self.azure_endpoint =
|
|
57
|
-
|
|
56
|
+
self.azure_endpoint = (
|
|
57
|
+
azure_endpoint
|
|
58
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
59
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
58
60
|
)
|
|
59
61
|
if temperature < 0:
|
|
60
62
|
raise ValueError("Temperature must be >= 0.")
|
|
@@ -317,8 +319,15 @@ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
|
|
|
317
319
|
return kwargs
|
|
318
320
|
|
|
319
321
|
def _build_client(self, cls):
|
|
322
|
+
api_key = require_secret_api_key(
|
|
323
|
+
self.azure_openai_api_key,
|
|
324
|
+
provider_label="AzureOpenAI",
|
|
325
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
326
|
+
param_hint="`azure_openai_api_key` to MultimodalAzureOpenAIMLLMModel(...)",
|
|
327
|
+
)
|
|
328
|
+
|
|
320
329
|
kw = dict(
|
|
321
|
-
api_key=
|
|
330
|
+
api_key=api_key,
|
|
322
331
|
api_version=self.openai_api_version,
|
|
323
332
|
azure_endpoint=self.azure_endpoint,
|
|
324
333
|
azure_deployment=self.deployment_name,
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
from typing import Optional, List, Union
|
|
2
1
|
import requests
|
|
3
|
-
from
|
|
2
|
+
from typing import Optional, List, Union
|
|
3
|
+
from pydantic import BaseModel, SecretStr
|
|
4
4
|
from google.genai import types
|
|
5
5
|
from google import genai
|
|
6
6
|
|
|
7
|
+
from deepeval.config.settings import get_settings
|
|
8
|
+
from deepeval.models.utils import require_secret_api_key
|
|
7
9
|
from deepeval.models.retry_policy import (
|
|
8
10
|
create_retry_decorator,
|
|
9
11
|
)
|
|
10
|
-
from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
|
|
11
12
|
from deepeval.models.base_model import DeepEvalBaseMLLM
|
|
12
13
|
from deepeval.test_case import MLLMImage
|
|
13
|
-
from deepeval.config.settings import get_settings
|
|
14
14
|
from deepeval.constants import ProviderSlug as PS
|
|
15
15
|
|
|
16
16
|
|
|
@@ -60,77 +60,31 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
60
60
|
*args,
|
|
61
61
|
**kwargs,
|
|
62
62
|
):
|
|
63
|
+
settings = get_settings()
|
|
63
64
|
model_name = (
|
|
64
65
|
model_name
|
|
65
|
-
or
|
|
66
|
+
or settings.GEMINI_MODEL_NAME
|
|
66
67
|
or default_multimodal_gemini_model
|
|
67
68
|
)
|
|
68
69
|
|
|
69
|
-
# Get API key from
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
self.project = project or KEY_FILE_HANDLER.fetch_data(
|
|
74
|
-
ModelKeyValues.GOOGLE_CLOUD_PROJECT
|
|
75
|
-
)
|
|
76
|
-
self.location = location or KEY_FILE_HANDLER.fetch_data(
|
|
77
|
-
ModelKeyValues.GOOGLE_CLOUD_LOCATION
|
|
78
|
-
)
|
|
79
|
-
self.use_vertexai = KEY_FILE_HANDLER.fetch_data(
|
|
80
|
-
ModelKeyValues.GOOGLE_GENAI_USE_VERTEXAI
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
super().__init__(model_name, *args, **kwargs)
|
|
84
|
-
self.model = self.load_model(*args, **kwargs)
|
|
85
|
-
|
|
86
|
-
def should_use_vertexai(self):
|
|
87
|
-
"""Checks if the model should use Vertex AI for generation.
|
|
88
|
-
|
|
89
|
-
This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
|
|
90
|
-
environment variable. If not set, it checks for the presence of the
|
|
91
|
-
project and location.
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
True if the model should use Vertex AI, False otherwise
|
|
95
|
-
"""
|
|
96
|
-
if self.use_vertexai is not None:
|
|
97
|
-
return self.use_vertexai.lower() == "yes"
|
|
98
|
-
|
|
99
|
-
if self.project and self.location:
|
|
100
|
-
return True
|
|
70
|
+
# Get API key from settings if not provided
|
|
71
|
+
if api_key is not None:
|
|
72
|
+
# keep it secret, keep it safe from serializings, logging and aolike
|
|
73
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
101
74
|
else:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def load_model(self, *args, **kwargs):
|
|
105
|
-
"""Creates a client.
|
|
106
|
-
With Gen AI SDK, model is set at inference time, so there is no
|
|
107
|
-
model to load and initialize.
|
|
108
|
-
This method name is kept for compatibility with other LLMs.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
A GenerativeModel instance configured for evaluation.
|
|
112
|
-
"""
|
|
113
|
-
if self.should_use_vertexai():
|
|
114
|
-
if not self.project or not self.location:
|
|
115
|
-
raise ValueError(
|
|
116
|
-
"When using Vertex AI API, both project and location are required."
|
|
117
|
-
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
|
|
118
|
-
"or set them in your DeepEval configuration."
|
|
119
|
-
)
|
|
75
|
+
self.api_key = settings.GOOGLE_API_KEY
|
|
120
76
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
"Google API key is required. Either provide it directly, set GOOGLE_API_KEY environment variable, "
|
|
129
|
-
"or set it in your DeepEval configuration."
|
|
130
|
-
)
|
|
77
|
+
self.project = project or settings.GOOGLE_CLOUD_PROJECT
|
|
78
|
+
self.location = (
|
|
79
|
+
location
|
|
80
|
+
or settings.GOOGLE_CLOUD_LOCATION is not None
|
|
81
|
+
and str(settings.GOOGLE_CLOUD_LOCATION)
|
|
82
|
+
)
|
|
83
|
+
self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
|
|
131
84
|
|
|
132
|
-
|
|
133
|
-
|
|
85
|
+
# Keep any extra kwargs for the underlying genai.Client
|
|
86
|
+
self.args = args
|
|
87
|
+
self.kwargs = kwargs
|
|
134
88
|
|
|
135
89
|
# Configure default model generation settings
|
|
136
90
|
self.model_safety_settings = [
|
|
@@ -152,9 +106,28 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
152
106
|
),
|
|
153
107
|
]
|
|
154
108
|
self.model_temperature = 0.0
|
|
155
|
-
return self.client.models
|
|
156
109
|
|
|
157
|
-
|
|
110
|
+
super().__init__(model_name, *args, **kwargs)
|
|
111
|
+
|
|
112
|
+
def should_use_vertexai(self):
|
|
113
|
+
"""Checks if the model should use Vertex AI for generation.
|
|
114
|
+
|
|
115
|
+
This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
|
|
116
|
+
environment variable. If not set, it checks for the presence of the
|
|
117
|
+
project and location.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
True if the model should use Vertex AI, False otherwise
|
|
121
|
+
"""
|
|
122
|
+
if self.use_vertexai is not None:
|
|
123
|
+
return self.use_vertexai.lower() == "yes"
|
|
124
|
+
|
|
125
|
+
if self.project and self.location:
|
|
126
|
+
return True
|
|
127
|
+
else:
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
# TODO: Refactor generate prompt to minimize the work done on retry
|
|
158
131
|
@retry_gemini
|
|
159
132
|
def generate_prompt(
|
|
160
133
|
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
@@ -214,10 +187,11 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
214
187
|
Returns:
|
|
215
188
|
Generated text response
|
|
216
189
|
"""
|
|
190
|
+
client = self.load_model()
|
|
217
191
|
prompt = self.generate_prompt(multimodal_input)
|
|
218
192
|
|
|
219
193
|
if schema is not None:
|
|
220
|
-
response =
|
|
194
|
+
response = client.models.generate_content(
|
|
221
195
|
model=self.model_name,
|
|
222
196
|
contents=prompt,
|
|
223
197
|
config=types.GenerateContentConfig(
|
|
@@ -229,7 +203,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
229
203
|
)
|
|
230
204
|
return response.parsed, 0
|
|
231
205
|
else:
|
|
232
|
-
response =
|
|
206
|
+
response = client.models.generate_content(
|
|
233
207
|
model=self.model_name,
|
|
234
208
|
contents=prompt,
|
|
235
209
|
config=types.GenerateContentConfig(
|
|
@@ -254,10 +228,11 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
254
228
|
Returns:
|
|
255
229
|
Generated text response
|
|
256
230
|
"""
|
|
231
|
+
client = self.load_model()
|
|
257
232
|
prompt = self.generate_prompt(multimodal_input)
|
|
258
233
|
|
|
259
234
|
if schema is not None:
|
|
260
|
-
response = await
|
|
235
|
+
response = await client.aio.models.generate_content(
|
|
261
236
|
model=self.model_name,
|
|
262
237
|
contents=prompt,
|
|
263
238
|
config=types.GenerateContentConfig(
|
|
@@ -269,7 +244,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
269
244
|
)
|
|
270
245
|
return response.parsed, 0
|
|
271
246
|
else:
|
|
272
|
-
response = await
|
|
247
|
+
response = await client.aio.models.generate_content(
|
|
273
248
|
model=self.model_name,
|
|
274
249
|
contents=prompt,
|
|
275
250
|
config=types.GenerateContentConfig(
|
|
@@ -279,6 +254,60 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
|
|
|
279
254
|
)
|
|
280
255
|
return response.text, 0
|
|
281
256
|
|
|
257
|
+
#########
|
|
258
|
+
# Model #
|
|
259
|
+
#########
|
|
260
|
+
|
|
282
261
|
def get_model_name(self) -> str:
|
|
283
262
|
"""Returns the name of the Gemini model being used."""
|
|
284
263
|
return self.model_name
|
|
264
|
+
|
|
265
|
+
def load_model(self, *args, **kwargs):
|
|
266
|
+
"""Creates and returns a GenAI client.
|
|
267
|
+
|
|
268
|
+
With the Gen AI SDK, the model is set at inference time, so we only
|
|
269
|
+
construct the client here. Kept for compatibility with other MLLMs.
|
|
270
|
+
"""
|
|
271
|
+
return self._build_client(**kwargs)
|
|
272
|
+
|
|
273
|
+
def _client_kwargs(self, **override_kwargs) -> dict:
|
|
274
|
+
"""
|
|
275
|
+
Return kwargs forwarded to genai.Client.
|
|
276
|
+
|
|
277
|
+
Start from the ctor kwargs captured on `self.kwargs`, then apply any
|
|
278
|
+
overrides passed via load_model(...).
|
|
279
|
+
"""
|
|
280
|
+
client_kwargs = dict(self.kwargs or {})
|
|
281
|
+
if override_kwargs:
|
|
282
|
+
client_kwargs.update(override_kwargs)
|
|
283
|
+
return client_kwargs
|
|
284
|
+
|
|
285
|
+
def _build_client(self, **override_kwargs):
|
|
286
|
+
"""Build and return a genai.Client for either Gemini API or Vertex AI."""
|
|
287
|
+
client_kwargs = self._client_kwargs(**override_kwargs)
|
|
288
|
+
|
|
289
|
+
if self.should_use_vertexai():
|
|
290
|
+
if not self.project or not self.location:
|
|
291
|
+
raise ValueError(
|
|
292
|
+
"When using Vertex AI API, both project and location are required."
|
|
293
|
+
"Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
|
|
294
|
+
"or set them in your DeepEval configuration."
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Create client for Vertex AI
|
|
298
|
+
return genai.Client(
|
|
299
|
+
vertexai=True,
|
|
300
|
+
project=self.project,
|
|
301
|
+
location=self.location,
|
|
302
|
+
**client_kwargs,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
api_key = require_secret_api_key(
|
|
306
|
+
self.api_key,
|
|
307
|
+
provider_label="Google Gemini",
|
|
308
|
+
env_var_name="GOOGLE_API_KEY",
|
|
309
|
+
param_hint="`api_key` to MultimodalGeminiModel(...)",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Create client for Gemini API
|
|
313
|
+
return genai.Client(api_key=api_key, **client_kwargs)
|
|
@@ -8,7 +8,6 @@ import io
|
|
|
8
8
|
from deepeval.models.retry_policy import (
|
|
9
9
|
create_retry_decorator,
|
|
10
10
|
)
|
|
11
|
-
from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
|
|
12
11
|
from deepeval.models import DeepEvalBaseMLLM
|
|
13
12
|
from deepeval.test_case import MLLMImage
|
|
14
13
|
from deepeval.config.settings import get_settings
|
|
@@ -19,14 +18,34 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
|
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
22
|
-
def __init__(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
host: Optional[str] = None,
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Multimodal Ollama model.
|
|
29
|
+
|
|
30
|
+
- `model`: Ollama model name (e.g. "llava").
|
|
31
|
+
- `host`: Ollama base URL (e.g. "http://localhost:11434").
|
|
32
|
+
- extra **kwargs are passed through to the underlying Client.
|
|
33
|
+
"""
|
|
34
|
+
settings = get_settings()
|
|
35
|
+
|
|
36
|
+
# Resolve host/base URL
|
|
37
|
+
self.base_url = (
|
|
38
|
+
host
|
|
39
|
+
or settings.LOCAL_MODEL_BASE_URL
|
|
40
|
+
and str(settings.LOCAL_MODEL_BASE_URL)
|
|
28
41
|
)
|
|
29
|
-
|
|
42
|
+
|
|
43
|
+
# Resolve model name
|
|
44
|
+
model_name = model or settings.LOCAL_MODEL_NAME
|
|
45
|
+
|
|
46
|
+
# Client kwargs
|
|
47
|
+
self.kwargs = kwargs or {}
|
|
48
|
+
|
|
30
49
|
super().__init__(model_name)
|
|
31
50
|
|
|
32
51
|
@retry_ollama
|
|
@@ -132,13 +151,25 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
|
|
|
132
151
|
print(f"Error converting image to base64: {e}")
|
|
133
152
|
return None
|
|
134
153
|
|
|
154
|
+
###############################################
|
|
155
|
+
# Model
|
|
156
|
+
###############################################
|
|
157
|
+
|
|
135
158
|
def load_model(self, async_mode: bool = False):
|
|
136
159
|
if not async_mode:
|
|
137
160
|
return self._build_client(Client)
|
|
138
161
|
return self._build_client(AsyncClient)
|
|
139
162
|
|
|
163
|
+
def _client_kwargs(self) -> Dict:
|
|
164
|
+
"""
|
|
165
|
+
Return client-init kwargs.
|
|
166
|
+
Ollama's Python client doesn't have built-in retry config like OpenAI,
|
|
167
|
+
so we just pass these through untouched.
|
|
168
|
+
"""
|
|
169
|
+
return dict(self.kwargs or {})
|
|
170
|
+
|
|
140
171
|
def _build_client(self, cls):
|
|
141
|
-
return cls(host=self.base_url, **self.
|
|
172
|
+
return cls(host=self.base_url, **self._client_kwargs())
|
|
142
173
|
|
|
143
174
|
def get_model_name(self):
|
|
144
175
|
return f"{self.model_name} (Ollama)"
|