deepeval 3.7.3__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/evaluate/configs.py +1 -1
  5. deepeval/evaluate/execute.py +4 -1
  6. deepeval/metrics/answer_relevancy/template.py +4 -4
  7. deepeval/metrics/argument_correctness/template.py +2 -2
  8. deepeval/metrics/bias/template.py +3 -3
  9. deepeval/metrics/contextual_precision/template.py +6 -6
  10. deepeval/metrics/contextual_recall/template.py +2 -2
  11. deepeval/metrics/contextual_relevancy/template.py +3 -3
  12. deepeval/metrics/conversation_completeness/template.py +2 -2
  13. deepeval/metrics/conversational_dag/templates.py +4 -4
  14. deepeval/metrics/conversational_g_eval/template.py +4 -3
  15. deepeval/metrics/dag/templates.py +4 -4
  16. deepeval/metrics/faithfulness/template.py +4 -4
  17. deepeval/metrics/hallucination/template.py +4 -4
  18. deepeval/metrics/misuse/template.py +2 -2
  19. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  20. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  21. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  22. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  23. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  24. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  25. deepeval/metrics/non_advice/template.py +2 -2
  26. deepeval/metrics/pii_leakage/template.py +2 -2
  27. deepeval/metrics/prompt_alignment/template.py +4 -4
  28. deepeval/metrics/role_violation/template.py +2 -2
  29. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  30. deepeval/metrics/toxicity/template.py +4 -4
  31. deepeval/metrics/turn_relevancy/template.py +2 -2
  32. deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  33. deepeval/models/embedding_models/local_embedding_model.py +23 -10
  34. deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  35. deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  36. deepeval/models/llms/anthropic_model.py +17 -5
  37. deepeval/models/llms/azure_model.py +30 -18
  38. deepeval/models/llms/deepseek_model.py +22 -12
  39. deepeval/models/llms/gemini_model.py +120 -87
  40. deepeval/models/llms/grok_model.py +23 -16
  41. deepeval/models/llms/kimi_model.py +23 -12
  42. deepeval/models/llms/litellm_model.py +63 -25
  43. deepeval/models/llms/local_model.py +26 -18
  44. deepeval/models/llms/ollama_model.py +17 -7
  45. deepeval/models/llms/openai_model.py +22 -17
  46. deepeval/models/llms/portkey_model.py +132 -0
  47. deepeval/models/mlllms/azure_model.py +28 -19
  48. deepeval/models/mlllms/gemini_model.py +102 -73
  49. deepeval/models/mlllms/ollama_model.py +40 -9
  50. deepeval/models/mlllms/openai_model.py +65 -14
  51. deepeval/models/utils.py +48 -3
  52. deepeval/optimization/__init__.py +13 -0
  53. deepeval/optimization/adapters/__init__.py +2 -0
  54. deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  55. deepeval/optimization/aggregates.py +14 -0
  56. deepeval/optimization/configs.py +34 -0
  57. deepeval/optimization/copro/configs.py +31 -0
  58. deepeval/optimization/copro/loop.py +837 -0
  59. deepeval/optimization/gepa/__init__.py +7 -0
  60. deepeval/optimization/gepa/configs.py +115 -0
  61. deepeval/optimization/gepa/loop.py +677 -0
  62. deepeval/optimization/miprov2/configs.py +134 -0
  63. deepeval/optimization/miprov2/loop.py +785 -0
  64. deepeval/optimization/mutations/__init__.py +0 -0
  65. deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  66. deepeval/optimization/policies/__init__.py +16 -0
  67. deepeval/optimization/policies/selection.py +166 -0
  68. deepeval/optimization/policies/tie_breaker.py +67 -0
  69. deepeval/optimization/prompt_optimizer.py +462 -0
  70. deepeval/optimization/simba/__init__.py +0 -0
  71. deepeval/optimization/simba/configs.py +33 -0
  72. deepeval/optimization/simba/loop.py +983 -0
  73. deepeval/optimization/simba/types.py +15 -0
  74. deepeval/optimization/types.py +361 -0
  75. deepeval/optimization/utils.py +598 -0
  76. deepeval/prompt/prompt.py +10 -5
  77. deepeval/test_run/cache.py +2 -0
  78. deepeval/test_run/test_run.py +6 -1
  79. deepeval/utils.py +24 -0
  80. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
  81. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/RECORD +84 -59
  82. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
  83. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
  84. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +0 -0
@@ -2,12 +2,11 @@ from ollama import Client, AsyncClient, ChatResponse
2
2
  from typing import Optional, Tuple, Union, Dict
3
3
  from pydantic import BaseModel
4
4
 
5
+ from deepeval.config.settings import get_settings
5
6
  from deepeval.models.retry_policy import (
6
7
  create_retry_decorator,
7
8
  )
8
-
9
9
  from deepeval.models import DeepEvalBaseLLM
10
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
11
10
  from deepeval.constants import ProviderSlug as PS
12
11
 
13
12
 
@@ -23,17 +22,20 @@ class OllamaModel(DeepEvalBaseLLM):
23
22
  generation_kwargs: Optional[Dict] = None,
24
23
  **kwargs,
25
24
  ):
26
- model_name = model or KEY_FILE_HANDLER.fetch_data(
27
- ModelKeyValues.LOCAL_MODEL_NAME
28
- )
25
+ settings = get_settings()
26
+ model_name = model or settings.LOCAL_MODEL_NAME
29
27
  self.base_url = (
30
28
  base_url
31
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.LOCAL_MODEL_BASE_URL)
29
+ or (
30
+ settings.LOCAL_MODEL_BASE_URL
31
+ and str(settings.LOCAL_MODEL_BASE_URL)
32
+ )
32
33
  or "http://localhost:11434"
33
34
  )
34
35
  if temperature < 0:
35
36
  raise ValueError("Temperature must be >= 0.")
36
37
  self.temperature = temperature
38
+ # Raw kwargs destined for the underlying Ollama client
37
39
  self.kwargs = kwargs
38
40
  self.generation_kwargs = generation_kwargs or {}
39
41
  super().__init__(model_name)
@@ -97,8 +99,16 @@ class OllamaModel(DeepEvalBaseLLM):
97
99
  return self._build_client(Client)
98
100
  return self._build_client(AsyncClient)
99
101
 
102
+ def _client_kwargs(self) -> Dict:
103
+ """Return kwargs forwarded to the underlying Ollama Client/AsyncClient."""
104
+ return dict(self.kwargs or {})
105
+
100
106
  def _build_client(self, cls):
101
- return cls(host=self.base_url, **self.kwargs)
107
+ kw = dict(
108
+ host=self.base_url,
109
+ **self._client_kwargs(),
110
+ )
111
+ return cls(**kw)
102
112
 
103
113
  def get_model_name(self):
104
114
  return f"{self.model_name} (Ollama)"
@@ -1,7 +1,6 @@
1
1
  from openai.types.chat.chat_completion import ChatCompletion
2
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
3
2
  from typing import Optional, Tuple, Union, Dict
4
- from pydantic import BaseModel
3
+ from pydantic import BaseModel, SecretStr
5
4
 
6
5
  from openai import (
7
6
  OpenAI,
@@ -12,7 +11,7 @@ from deepeval.config.settings import get_settings
12
11
  from deepeval.constants import ProviderSlug as PS
13
12
  from deepeval.models import DeepEvalBaseLLM
14
13
  from deepeval.models.llms.utils import trim_and_load_json
15
- from deepeval.models.utils import parse_model_name
14
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
16
15
  from deepeval.models.retry_policy import (
17
16
  create_retry_decorator,
18
17
  sdk_retries_for,
@@ -227,23 +226,18 @@ class GPTModel(DeepEvalBaseLLM):
227
226
  generation_kwargs: Optional[Dict] = None,
228
227
  **kwargs,
229
228
  ):
229
+ settings = get_settings()
230
230
  model_name = None
231
- model = model or KEY_FILE_HANDLER.fetch_data(
232
- ModelKeyValues.OPENAI_MODEL_NAME
233
- )
231
+ model = model or settings.OPENAI_MODEL_NAME
234
232
  cost_per_input_token = (
235
233
  cost_per_input_token
236
234
  if cost_per_input_token is not None
237
- else KEY_FILE_HANDLER.fetch_data(
238
- ModelKeyValues.OPENAI_COST_PER_INPUT_TOKEN
239
- )
235
+ else settings.OPENAI_COST_PER_INPUT_TOKEN
240
236
  )
241
237
  cost_per_output_token = (
242
238
  cost_per_output_token
243
239
  if cost_per_output_token is not None
244
- else KEY_FILE_HANDLER.fetch_data(
245
- ModelKeyValues.OPENAI_COST_PER_OUTPUT_TOKEN
246
- )
240
+ else settings.OPENAI_COST_PER_OUTPUT_TOKEN
247
241
  )
248
242
 
249
243
  if isinstance(model, str):
@@ -272,7 +266,12 @@ class GPTModel(DeepEvalBaseLLM):
272
266
  elif model is None:
273
267
  model_name = default_gpt_model
274
268
 
275
- self._openai_api_key = _openai_api_key
269
+ if _openai_api_key is not None:
270
+ # keep it secret, keep it safe from serializings, logging and alike
271
+ self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
272
+ else:
273
+ self._openai_api_key = get_settings().OPENAI_API_KEY
274
+
276
275
  self.base_url = base_url
277
276
  # args and kwargs will be passed to the underlying model, in load_model function
278
277
 
@@ -485,9 +484,9 @@ class GPTModel(DeepEvalBaseLLM):
485
484
  output_cost = output_tokens * pricing["output"]
486
485
  return input_cost + output_cost
487
486
 
488
- ###############################################
489
- # Model
490
- ###############################################
487
+ #########
488
+ # Model #
489
+ #########
491
490
 
492
491
  def get_model_name(self):
493
492
  return self.model_name
@@ -512,9 +511,15 @@ class GPTModel(DeepEvalBaseLLM):
512
511
  return kwargs
513
512
 
514
513
  def _build_client(self, cls):
514
+ api_key = require_secret_api_key(
515
+ self._openai_api_key,
516
+ provider_label="OpenAI",
517
+ env_var_name="OPENAI_API_KEY",
518
+ param_hint="`_openai_api_key` to GPTModel(...)",
519
+ )
515
520
 
516
521
  kw = dict(
517
- api_key=self._openai_api_key,
522
+ api_key=api_key,
518
523
  base_url=self.base_url,
519
524
  **self._client_kwargs(),
520
525
  )
@@ -0,0 +1,132 @@
1
+ import aiohttp
2
+ import requests
3
+ from typing import Any, Dict, List, Optional, Union
4
+ from pydantic import AnyUrl, SecretStr
5
+
6
+ from deepeval.config.settings import get_settings
7
+ from deepeval.models.utils import require_secret_api_key
8
+ from deepeval.models import DeepEvalBaseLLM
9
+ from deepeval.utils import require_param
10
+
11
+
12
+ class PortkeyModel(DeepEvalBaseLLM):
13
+ def __init__(
14
+ self,
15
+ model: Optional[str] = None,
16
+ api_key: Optional[str] = None,
17
+ base_url: Optional[AnyUrl] = None,
18
+ provider: Optional[str] = None,
19
+ ):
20
+ settings = get_settings()
21
+ model = model or settings.PORTKEY_MODEL_NAME
22
+
23
+ self.model = require_param(
24
+ model,
25
+ provider_label="Portkey",
26
+ env_var_name="PORTKEY_MODEL_NAME",
27
+ param_hint="model",
28
+ )
29
+
30
+ if api_key is not None:
31
+ # keep it secret, keep it safe from serializings, logging and alike
32
+ self.api_key: SecretStr | None = SecretStr(api_key)
33
+ else:
34
+ self.api_key = settings.PORTKEY_API_KEY
35
+
36
+ if base_url is not None:
37
+ base_url = str(base_url).rstrip("/")
38
+ elif settings.PORTKEY_BASE_URL is not None:
39
+ base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
40
+
41
+ self.base_url = require_param(
42
+ base_url,
43
+ provider_label="Portkey",
44
+ env_var_name="PORTKEY_BASE_URL",
45
+ param_hint="base_url",
46
+ )
47
+
48
+ provider = provider or settings.PORTKEY_PROVIDER_NAME
49
+ self.provider = require_param(
50
+ provider,
51
+ provider_label="Portkey",
52
+ env_var_name="PORTKEY_PROVIDER_NAME",
53
+ param_hint="provider",
54
+ )
55
+
56
+ def _headers(self) -> Dict[str, str]:
57
+ api_key = require_secret_api_key(
58
+ self.api_key,
59
+ provider_label="Portkey",
60
+ env_var_name="PORTKEY_API_KEY",
61
+ param_hint="`api_key` to PortkeyModel(...)",
62
+ )
63
+
64
+ headers = {
65
+ "Content-Type": "application/json",
66
+ "x-portkey-api-key": api_key,
67
+ }
68
+ if self.provider:
69
+ headers["x-portkey-provider"] = self.provider
70
+ return headers
71
+
72
+ def _payload(self, prompt: str) -> Dict[str, Any]:
73
+ return {
74
+ "model": self.model,
75
+ "messages": [{"role": "user", "content": prompt}],
76
+ }
77
+
78
+ def _extract_content(self, data: Dict[str, Any]) -> str:
79
+ choices: Union[List[Dict[str, Any]], None] = data.get("choices")
80
+ if not choices:
81
+ raise ValueError("Portkey response did not include any choices.")
82
+ message = choices[0].get("message", {})
83
+ content: Union[str, List[Dict[str, Any]], None] = message.get("content")
84
+ if isinstance(content, str):
85
+ return content
86
+ if isinstance(content, list):
87
+ return "".join(part.get("text", "") for part in content)
88
+ return ""
89
+
90
+ def generate(self, prompt: str) -> str:
91
+ try:
92
+ response = requests.post(
93
+ f"{self.base_url}/chat/completions",
94
+ json=self._payload(prompt),
95
+ headers=self._headers(),
96
+ timeout=60,
97
+ )
98
+ response.raise_for_status()
99
+ except requests.HTTPError as error:
100
+ body: Union[str, Dict[str, Any]]
101
+ try:
102
+ body = response.json()
103
+ except Exception:
104
+ body = response.text
105
+ raise ValueError(
106
+ f"Portkey request failed with status {response.status_code}: {body}"
107
+ ) from error
108
+ except requests.RequestException as error:
109
+ raise ValueError(f"Portkey request failed: {error}") from error
110
+ return self._extract_content(response.json())
111
+
112
+ async def a_generate(self, prompt: str) -> str:
113
+ async with aiohttp.ClientSession() as session:
114
+ async with session.post(
115
+ f"{self.base_url}/chat/completions",
116
+ json=self._payload(prompt),
117
+ headers=self._headers(),
118
+ timeout=60,
119
+ ) as response:
120
+ if response.status >= 400:
121
+ body = await response.text()
122
+ raise ValueError(
123
+ f"Portkey request failed with status {response.status}: {body}"
124
+ )
125
+ data = await response.json()
126
+ return self._extract_content(data)
127
+
128
+ def get_model_name(self) -> str:
129
+ return f"Portkey ({self.model})"
130
+
131
+ def load_model(self):
132
+ return None
@@ -1,12 +1,12 @@
1
+ import base64
1
2
  from openai.types.chat.chat_completion import ChatCompletion
2
3
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
4
  from typing import Optional, Tuple, Union, Dict, List
4
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, SecretStr
5
6
  from io import BytesIO
6
- import base64
7
7
 
8
+ from deepeval.config.settings import get_settings
8
9
  from deepeval.models import DeepEvalBaseMLLM
9
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
10
10
  from deepeval.test_case import MLLMImage
11
11
  from deepeval.models.llms.openai_model import (
12
12
  structured_outputs_models,
@@ -19,7 +19,7 @@ from deepeval.models.retry_policy import (
19
19
  )
20
20
 
21
21
  from deepeval.models.llms.utils import trim_and_load_json
22
- from deepeval.models.utils import parse_model_name
22
+ from deepeval.models.utils import parse_model_name, require_secret_api_key
23
23
  from deepeval.constants import ProviderSlug as PS
24
24
 
25
25
 
@@ -38,23 +38,25 @@ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
38
38
  generation_kwargs: Optional[Dict] = None,
39
39
  **kwargs,
40
40
  ):
41
+ settings = get_settings()
41
42
  # fetch Azure deployment parameters
42
- model_name = model_name or KEY_FILE_HANDLER.fetch_data(
43
- ModelKeyValues.AZURE_MODEL_NAME
44
- )
45
- self.deployment_name = deployment_name or KEY_FILE_HANDLER.fetch_data(
46
- ModelKeyValues.AZURE_DEPLOYMENT_NAME
47
- )
48
- self.azure_openai_api_key = (
49
- azure_openai_api_key
50
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.AZURE_OPENAI_API_KEY)
51
- )
43
+ model_name = model_name or settings.AZURE_MODEL_NAME
44
+ self.deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
45
+ if azure_openai_api_key is not None:
46
+ # keep it secret, keep it safe from serializings, logging and alike
47
+ self.azure_openai_api_key: SecretStr | None = SecretStr(
48
+ azure_openai_api_key
49
+ )
50
+ else:
51
+ self.azure_openai_api_key = settings.AZURE_OPENAI_API_KEY
52
+
52
53
  self.openai_api_version = (
53
- openai_api_version
54
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
54
+ openai_api_version or settings.OPENAI_API_VERSION
55
55
  )
56
- self.azure_endpoint = azure_endpoint or KEY_FILE_HANDLER.fetch_data(
57
- ModelKeyValues.AZURE_OPENAI_ENDPOINT
56
+ self.azure_endpoint = (
57
+ azure_endpoint
58
+ or settings.AZURE_OPENAI_ENDPOINT
59
+ and str(settings.AZURE_OPENAI_ENDPOINT)
58
60
  )
59
61
  if temperature < 0:
60
62
  raise ValueError("Temperature must be >= 0.")
@@ -317,8 +319,15 @@ class MultimodalAzureOpenAIMLLMModel(DeepEvalBaseMLLM):
317
319
  return kwargs
318
320
 
319
321
  def _build_client(self, cls):
322
+ api_key = require_secret_api_key(
323
+ self.azure_openai_api_key,
324
+ provider_label="AzureOpenAI",
325
+ env_var_name="AZURE_OPENAI_API_KEY",
326
+ param_hint="`azure_openai_api_key` to MultimodalAzureOpenAIMLLMModel(...)",
327
+ )
328
+
320
329
  kw = dict(
321
- api_key=self.azure_openai_api_key,
330
+ api_key=api_key,
322
331
  api_version=self.openai_api_version,
323
332
  azure_endpoint=self.azure_endpoint,
324
333
  azure_deployment=self.deployment_name,
@@ -1,16 +1,16 @@
1
- from typing import Optional, List, Union
2
1
  import requests
3
- from pydantic import BaseModel
2
+ from typing import Optional, List, Union
3
+ from pydantic import BaseModel, SecretStr
4
4
  from google.genai import types
5
5
  from google import genai
6
6
 
7
+ from deepeval.config.settings import get_settings
8
+ from deepeval.models.utils import require_secret_api_key
7
9
  from deepeval.models.retry_policy import (
8
10
  create_retry_decorator,
9
11
  )
10
- from deepeval.key_handler import ModelKeyValues, KEY_FILE_HANDLER
11
12
  from deepeval.models.base_model import DeepEvalBaseMLLM
12
13
  from deepeval.test_case import MLLMImage
13
- from deepeval.config.settings import get_settings
14
14
  from deepeval.constants import ProviderSlug as PS
15
15
 
16
16
 
@@ -60,77 +60,31 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
60
60
  *args,
61
61
  **kwargs,
62
62
  ):
63
+ settings = get_settings()
63
64
  model_name = (
64
65
  model_name
65
- or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.GEMINI_MODEL_NAME)
66
+ or settings.GEMINI_MODEL_NAME
66
67
  or default_multimodal_gemini_model
67
68
  )
68
69
 
69
- # Get API key from key handler if not provided
70
- self.api_key = api_key or KEY_FILE_HANDLER.fetch_data(
71
- ModelKeyValues.GOOGLE_API_KEY
72
- )
73
- self.project = project or KEY_FILE_HANDLER.fetch_data(
74
- ModelKeyValues.GOOGLE_CLOUD_PROJECT
75
- )
76
- self.location = location or KEY_FILE_HANDLER.fetch_data(
77
- ModelKeyValues.GOOGLE_CLOUD_LOCATION
78
- )
79
- self.use_vertexai = KEY_FILE_HANDLER.fetch_data(
80
- ModelKeyValues.GOOGLE_GENAI_USE_VERTEXAI
81
- )
82
-
83
- super().__init__(model_name, *args, **kwargs)
84
- self.model = self.load_model(*args, **kwargs)
85
-
86
- def should_use_vertexai(self):
87
- """Checks if the model should use Vertex AI for generation.
88
-
89
- This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
90
- environment variable. If not set, it checks for the presence of the
91
- project and location.
92
-
93
- Returns:
94
- True if the model should use Vertex AI, False otherwise
95
- """
96
- if self.use_vertexai is not None:
97
- return self.use_vertexai.lower() == "yes"
98
-
99
- if self.project and self.location:
100
- return True
70
+ # Get API key from settings if not provided
71
+ if api_key is not None:
72
+ # keep it secret, keep it safe from serializings, logging and aolike
73
+ self.api_key: SecretStr | None = SecretStr(api_key)
101
74
  else:
102
- return False
103
-
104
- def load_model(self, *args, **kwargs):
105
- """Creates a client.
106
- With Gen AI SDK, model is set at inference time, so there is no
107
- model to load and initialize.
108
- This method name is kept for compatibility with other LLMs.
109
-
110
- Returns:
111
- A GenerativeModel instance configured for evaluation.
112
- """
113
- if self.should_use_vertexai():
114
- if not self.project or not self.location:
115
- raise ValueError(
116
- "When using Vertex AI API, both project and location are required."
117
- "Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
118
- "or set them in your DeepEval configuration."
119
- )
75
+ self.api_key = settings.GOOGLE_API_KEY
120
76
 
121
- # Create client for Vertex AI
122
- self.client = genai.Client(
123
- vertexai=True, project=self.project, location=self.location
124
- )
125
- else:
126
- if not self.api_key:
127
- raise ValueError(
128
- "Google API key is required. Either provide it directly, set GOOGLE_API_KEY environment variable, "
129
- "or set it in your DeepEval configuration."
130
- )
77
+ self.project = project or settings.GOOGLE_CLOUD_PROJECT
78
+ self.location = (
79
+ location
80
+ or settings.GOOGLE_CLOUD_LOCATION is not None
81
+ and str(settings.GOOGLE_CLOUD_LOCATION)
82
+ )
83
+ self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
131
84
 
132
- # Create client for Gemini API
133
- self.client = genai.Client(api_key=self.api_key)
85
+ # Keep any extra kwargs for the underlying genai.Client
86
+ self.args = args
87
+ self.kwargs = kwargs
134
88
 
135
89
  # Configure default model generation settings
136
90
  self.model_safety_settings = [
@@ -152,9 +106,28 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
152
106
  ),
153
107
  ]
154
108
  self.model_temperature = 0.0
155
- return self.client.models
156
109
 
157
- # TODO: Refactor genete prompt to minimize the work done on retry
110
+ super().__init__(model_name, *args, **kwargs)
111
+
112
+ def should_use_vertexai(self):
113
+ """Checks if the model should use Vertex AI for generation.
114
+
115
+ This is determined first by the value of `GOOGLE_GENAI_USE_VERTEXAI`
116
+ environment variable. If not set, it checks for the presence of the
117
+ project and location.
118
+
119
+ Returns:
120
+ True if the model should use Vertex AI, False otherwise
121
+ """
122
+ if self.use_vertexai is not None:
123
+ return self.use_vertexai.lower() == "yes"
124
+
125
+ if self.project and self.location:
126
+ return True
127
+ else:
128
+ return False
129
+
130
+ # TODO: Refactor generate prompt to minimize the work done on retry
158
131
  @retry_gemini
159
132
  def generate_prompt(
160
133
  self, multimodal_input: List[Union[str, MLLMImage]] = []
@@ -214,10 +187,11 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
214
187
  Returns:
215
188
  Generated text response
216
189
  """
190
+ client = self.load_model()
217
191
  prompt = self.generate_prompt(multimodal_input)
218
192
 
219
193
  if schema is not None:
220
- response = self.client.models.generate_content(
194
+ response = client.models.generate_content(
221
195
  model=self.model_name,
222
196
  contents=prompt,
223
197
  config=types.GenerateContentConfig(
@@ -229,7 +203,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
229
203
  )
230
204
  return response.parsed, 0
231
205
  else:
232
- response = self.client.models.generate_content(
206
+ response = client.models.generate_content(
233
207
  model=self.model_name,
234
208
  contents=prompt,
235
209
  config=types.GenerateContentConfig(
@@ -254,10 +228,11 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
254
228
  Returns:
255
229
  Generated text response
256
230
  """
231
+ client = self.load_model()
257
232
  prompt = self.generate_prompt(multimodal_input)
258
233
 
259
234
  if schema is not None:
260
- response = await self.client.aio.models.generate_content(
235
+ response = await client.aio.models.generate_content(
261
236
  model=self.model_name,
262
237
  contents=prompt,
263
238
  config=types.GenerateContentConfig(
@@ -269,7 +244,7 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
269
244
  )
270
245
  return response.parsed, 0
271
246
  else:
272
- response = await self.client.aio.models.generate_content(
247
+ response = await client.aio.models.generate_content(
273
248
  model=self.model_name,
274
249
  contents=prompt,
275
250
  config=types.GenerateContentConfig(
@@ -279,6 +254,60 @@ class MultimodalGeminiModel(DeepEvalBaseMLLM):
279
254
  )
280
255
  return response.text, 0
281
256
 
257
+ #########
258
+ # Model #
259
+ #########
260
+
282
261
  def get_model_name(self) -> str:
283
262
  """Returns the name of the Gemini model being used."""
284
263
  return self.model_name
264
+
265
+ def load_model(self, *args, **kwargs):
266
+ """Creates and returns a GenAI client.
267
+
268
+ With the Gen AI SDK, the model is set at inference time, so we only
269
+ construct the client here. Kept for compatibility with other MLLMs.
270
+ """
271
+ return self._build_client(**kwargs)
272
+
273
+ def _client_kwargs(self, **override_kwargs) -> dict:
274
+ """
275
+ Return kwargs forwarded to genai.Client.
276
+
277
+ Start from the ctor kwargs captured on `self.kwargs`, then apply any
278
+ overrides passed via load_model(...).
279
+ """
280
+ client_kwargs = dict(self.kwargs or {})
281
+ if override_kwargs:
282
+ client_kwargs.update(override_kwargs)
283
+ return client_kwargs
284
+
285
+ def _build_client(self, **override_kwargs):
286
+ """Build and return a genai.Client for either Gemini API or Vertex AI."""
287
+ client_kwargs = self._client_kwargs(**override_kwargs)
288
+
289
+ if self.should_use_vertexai():
290
+ if not self.project or not self.location:
291
+ raise ValueError(
292
+ "When using Vertex AI API, both project and location are required."
293
+ "Either provide them as arguments or set GOOGLE_CLOUD_PROJECT and GOOGLE_CLOUD_LOCATION environment variables, "
294
+ "or set them in your DeepEval configuration."
295
+ )
296
+
297
+ # Create client for Vertex AI
298
+ return genai.Client(
299
+ vertexai=True,
300
+ project=self.project,
301
+ location=self.location,
302
+ **client_kwargs,
303
+ )
304
+
305
+ api_key = require_secret_api_key(
306
+ self.api_key,
307
+ provider_label="Google Gemini",
308
+ env_var_name="GOOGLE_API_KEY",
309
+ param_hint="`api_key` to MultimodalGeminiModel(...)",
310
+ )
311
+
312
+ # Create client for Gemini API
313
+ return genai.Client(api_key=api_key, **client_kwargs)
@@ -8,7 +8,6 @@ import io
8
8
  from deepeval.models.retry_policy import (
9
9
  create_retry_decorator,
10
10
  )
11
- from deepeval.key_handler import KEY_FILE_HANDLER, ModelKeyValues
12
11
  from deepeval.models import DeepEvalBaseMLLM
13
12
  from deepeval.test_case import MLLMImage
14
13
  from deepeval.config.settings import get_settings
@@ -19,14 +18,34 @@ retry_ollama = create_retry_decorator(PS.OLLAMA)
19
18
 
20
19
 
21
20
  class MultimodalOllamaModel(DeepEvalBaseMLLM):
22
- def __init__(self, **kwargs):
23
- model_name = KEY_FILE_HANDLER.fetch_data(
24
- ModelKeyValues.LOCAL_MODEL_NAME
25
- )
26
- self.base_url = KEY_FILE_HANDLER.fetch_data(
27
- ModelKeyValues.LOCAL_MODEL_BASE_URL
21
+ def __init__(
22
+ self,
23
+ model: Optional[str] = None,
24
+ host: Optional[str] = None,
25
+ **kwargs,
26
+ ):
27
+ """
28
+ Multimodal Ollama model.
29
+
30
+ - `model`: Ollama model name (e.g. "llava").
31
+ - `host`: Ollama base URL (e.g. "http://localhost:11434").
32
+ - extra **kwargs are passed through to the underlying Client.
33
+ """
34
+ settings = get_settings()
35
+
36
+ # Resolve host/base URL
37
+ self.base_url = (
38
+ host
39
+ or settings.LOCAL_MODEL_BASE_URL
40
+ and str(settings.LOCAL_MODEL_BASE_URL)
28
41
  )
29
- self.kwargs = kwargs
42
+
43
+ # Resolve model name
44
+ model_name = model or settings.LOCAL_MODEL_NAME
45
+
46
+ # Client kwargs
47
+ self.kwargs = kwargs or {}
48
+
30
49
  super().__init__(model_name)
31
50
 
32
51
  @retry_ollama
@@ -132,13 +151,25 @@ class MultimodalOllamaModel(DeepEvalBaseMLLM):
132
151
  print(f"Error converting image to base64: {e}")
133
152
  return None
134
153
 
154
+ ###############################################
155
+ # Model
156
+ ###############################################
157
+
135
158
  def load_model(self, async_mode: bool = False):
136
159
  if not async_mode:
137
160
  return self._build_client(Client)
138
161
  return self._build_client(AsyncClient)
139
162
 
163
+ def _client_kwargs(self) -> Dict:
164
+ """
165
+ Return client-init kwargs.
166
+ Ollama's Python client doesn't have built-in retry config like OpenAI,
167
+ so we just pass these through untouched.
168
+ """
169
+ return dict(self.kwargs or {})
170
+
140
171
  def _build_client(self, cls):
141
- return cls(host=self.base_url, **self.kwargs)
172
+ return cls(host=self.base_url, **self._client_kwargs())
142
173
 
143
174
  def get_model_name(self):
144
175
  return f"{self.model_name} (Ollama)"