deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
from pydantic import AnyUrl, SecretStr
|
|
5
|
+
|
|
6
|
+
from deepeval.config.settings import get_settings
|
|
7
|
+
from deepeval.models.utils import (
|
|
8
|
+
require_secret_api_key,
|
|
9
|
+
)
|
|
10
|
+
from deepeval.models import DeepEvalBaseLLM
|
|
11
|
+
from deepeval.utils import require_param
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _request_timeout_seconds() -> float:
|
|
15
|
+
timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
|
|
16
|
+
return timeout if timeout > 0 else 30.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PortkeyModel(DeepEvalBaseLLM):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
model: Optional[str] = None,
|
|
23
|
+
api_key: Optional[str] = None,
|
|
24
|
+
base_url: Optional[AnyUrl] = None,
|
|
25
|
+
provider: Optional[str] = None,
|
|
26
|
+
generation_kwargs: Optional[Dict] = None,
|
|
27
|
+
**kwargs,
|
|
28
|
+
):
|
|
29
|
+
settings = get_settings()
|
|
30
|
+
model = model or settings.PORTKEY_MODEL_NAME
|
|
31
|
+
|
|
32
|
+
self.name = require_param(
|
|
33
|
+
model,
|
|
34
|
+
provider_label="Portkey",
|
|
35
|
+
env_var_name="PORTKEY_MODEL_NAME",
|
|
36
|
+
param_hint="model",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if api_key is not None:
|
|
40
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
41
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
42
|
+
else:
|
|
43
|
+
self.api_key = settings.PORTKEY_API_KEY
|
|
44
|
+
|
|
45
|
+
if base_url is not None:
|
|
46
|
+
base_url = str(base_url).rstrip("/")
|
|
47
|
+
elif settings.PORTKEY_BASE_URL is not None:
|
|
48
|
+
base_url = str(settings.PORTKEY_BASE_URL).rstrip("/")
|
|
49
|
+
|
|
50
|
+
self.base_url = require_param(
|
|
51
|
+
base_url,
|
|
52
|
+
provider_label="Portkey",
|
|
53
|
+
env_var_name="PORTKEY_BASE_URL",
|
|
54
|
+
param_hint="base_url",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
provider = provider or settings.PORTKEY_PROVIDER_NAME
|
|
58
|
+
self.provider = require_param(
|
|
59
|
+
provider,
|
|
60
|
+
provider_label="Portkey",
|
|
61
|
+
env_var_name="PORTKEY_PROVIDER_NAME",
|
|
62
|
+
param_hint="provider",
|
|
63
|
+
)
|
|
64
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
65
|
+
self.kwargs = kwargs
|
|
66
|
+
self.generation_kwargs = generation_kwargs or {}
|
|
67
|
+
|
|
68
|
+
def _headers(self) -> Dict[str, str]:
|
|
69
|
+
api_key = require_secret_api_key(
|
|
70
|
+
self.api_key,
|
|
71
|
+
provider_label="Portkey",
|
|
72
|
+
env_var_name="PORTKEY_API_KEY",
|
|
73
|
+
param_hint="`api_key` to PortkeyModel(...)",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
headers = {
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
"x-portkey-api-key": api_key,
|
|
79
|
+
}
|
|
80
|
+
if self.provider:
|
|
81
|
+
headers["x-portkey-provider"] = self.provider
|
|
82
|
+
return headers
|
|
83
|
+
|
|
84
|
+
def _payload(self, prompt: str) -> Dict[str, Any]:
|
|
85
|
+
payload = {
|
|
86
|
+
"model": self.name,
|
|
87
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
88
|
+
}
|
|
89
|
+
if self.generation_kwargs:
|
|
90
|
+
payload.update(self.generation_kwargs)
|
|
91
|
+
return payload
|
|
92
|
+
|
|
93
|
+
def _extract_content(self, data: Dict[str, Any]) -> str:
|
|
94
|
+
choices: Union[List[Dict[str, Any]], None] = data.get("choices")
|
|
95
|
+
if not choices:
|
|
96
|
+
raise ValueError("Portkey response did not include any choices.")
|
|
97
|
+
message = choices[0].get("message", {})
|
|
98
|
+
content: Union[str, List[Dict[str, Any]], None] = message.get("content")
|
|
99
|
+
if isinstance(content, str):
|
|
100
|
+
return content
|
|
101
|
+
if isinstance(content, list):
|
|
102
|
+
return "".join(part.get("text", "") for part in content)
|
|
103
|
+
return ""
|
|
104
|
+
|
|
105
|
+
def generate(self, prompt: str) -> str:
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
response = requests.post(
|
|
109
|
+
f"{self.base_url}/chat/completions",
|
|
110
|
+
json=self._payload(prompt),
|
|
111
|
+
headers=self._headers(),
|
|
112
|
+
timeout=60,
|
|
113
|
+
)
|
|
114
|
+
response.raise_for_status()
|
|
115
|
+
except requests.HTTPError as error:
|
|
116
|
+
body: Union[str, Dict[str, Any]]
|
|
117
|
+
try:
|
|
118
|
+
body = response.json()
|
|
119
|
+
except Exception:
|
|
120
|
+
body = response.text
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"Portkey request failed with status {response.status_code}: {body}"
|
|
123
|
+
) from error
|
|
124
|
+
except requests.RequestException as error:
|
|
125
|
+
raise ValueError(f"Portkey request failed: {error}") from error
|
|
126
|
+
return self._extract_content(response.json())
|
|
127
|
+
|
|
128
|
+
async def a_generate(self, prompt: str) -> str:
|
|
129
|
+
|
|
130
|
+
async with aiohttp.ClientSession() as session:
|
|
131
|
+
async with session.post(
|
|
132
|
+
f"{self.base_url}/chat/completions",
|
|
133
|
+
json=self._payload(prompt),
|
|
134
|
+
headers=self._headers(),
|
|
135
|
+
timeout=60,
|
|
136
|
+
) as response:
|
|
137
|
+
if response.status >= 400:
|
|
138
|
+
body = await response.text()
|
|
139
|
+
raise ValueError(
|
|
140
|
+
f"Portkey request failed with status {response.status}: {body}"
|
|
141
|
+
)
|
|
142
|
+
data = await response.json()
|
|
143
|
+
return self._extract_content(data)
|
|
144
|
+
|
|
145
|
+
def load_model(self):
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
def get_model_name(self):
|
|
149
|
+
return f"{self.name} (Portkey)"
|
deepeval/models/llms/utils.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
2
|
import re
|
|
3
3
|
import json
|
|
4
4
|
import asyncio
|
|
5
5
|
|
|
6
|
+
MULTIMODAL_MODELS = ["GPTModel", "AzureModel", "GeminiModel", "OllamaModel"]
|
|
7
|
+
|
|
6
8
|
|
|
7
9
|
def trim_and_load_json(
|
|
8
10
|
input_string: str,
|
|
@@ -38,7 +40,7 @@ def safe_asyncio_run(coro):
|
|
|
38
40
|
return loop.run_until_complete(future)
|
|
39
41
|
else:
|
|
40
42
|
return loop.run_until_complete(coro)
|
|
41
|
-
except Exception
|
|
43
|
+
except Exception:
|
|
42
44
|
raise
|
|
43
|
-
except Exception
|
|
45
|
+
except Exception:
|
|
44
46
|
raise
|
deepeval/models/retry_policy.py
CHANGED
|
@@ -55,6 +55,7 @@ from tenacity.stop import stop_base
|
|
|
55
55
|
from tenacity.wait import wait_base
|
|
56
56
|
from contextvars import ContextVar, copy_context
|
|
57
57
|
|
|
58
|
+
from deepeval.utils import require_dependency
|
|
58
59
|
from deepeval.constants import (
|
|
59
60
|
ProviderSlug as PS,
|
|
60
61
|
slugify,
|
|
@@ -829,25 +830,23 @@ try:
|
|
|
829
830
|
except Exception: # botocore not present (aiobotocore optional)
|
|
830
831
|
BEDROCK_ERROR_POLICY = None
|
|
831
832
|
|
|
832
|
-
|
|
833
833
|
####################
|
|
834
834
|
# Anthropic Policy #
|
|
835
835
|
####################
|
|
836
836
|
|
|
837
837
|
try:
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
APIStatusError,
|
|
838
|
+
|
|
839
|
+
module = require_dependency(
|
|
840
|
+
"anthropic",
|
|
841
|
+
provider_label="retry_policy",
|
|
842
|
+
install_hint="Install it with `pip install anthropic`.",
|
|
844
843
|
)
|
|
845
844
|
|
|
846
845
|
ANTHROPIC_ERROR_POLICY = ErrorPolicy(
|
|
847
|
-
auth_excs=(AuthenticationError,),
|
|
848
|
-
rate_limit_excs=(RateLimitError,),
|
|
849
|
-
network_excs=(APIConnectionError, APITimeoutError),
|
|
850
|
-
http_excs=(APIStatusError,),
|
|
846
|
+
auth_excs=(module.AuthenticationError,),
|
|
847
|
+
rate_limit_excs=(module.RateLimitError,),
|
|
848
|
+
network_excs=(module.APIConnectionError, module.APITimeoutError),
|
|
849
|
+
http_excs=(module.APIStatusError,),
|
|
851
850
|
non_retryable_codes=frozenset(), # update if we learn of hard quota codes
|
|
852
851
|
message_markers={},
|
|
853
852
|
)
|
|
@@ -868,7 +867,11 @@ except Exception: # Anthropic optional
|
|
|
868
867
|
# and gate retries using message markers (code sniffing).
|
|
869
868
|
# See: https://github.com/googleapis/python-genai?tab=readme-ov-file#error-handling
|
|
870
869
|
try:
|
|
871
|
-
|
|
870
|
+
module = require_dependency(
|
|
871
|
+
"google.genai",
|
|
872
|
+
provider_label="retry_policy",
|
|
873
|
+
install_hint="Install it with `pip install google-genai`.",
|
|
874
|
+
)
|
|
872
875
|
|
|
873
876
|
_HTTPX_NET_EXCS = _httpx_net_excs()
|
|
874
877
|
_REQUESTS_EXCS = _requests_net_excs()
|
|
@@ -887,9 +890,9 @@ try:
|
|
|
887
890
|
GOOGLE_ERROR_POLICY = ErrorPolicy(
|
|
888
891
|
auth_excs=(), # we will classify 401/403 via markers below (see non-retryable codes)
|
|
889
892
|
rate_limit_excs=(
|
|
890
|
-
gerrors.ClientError,
|
|
893
|
+
module.gerrors.ClientError,
|
|
891
894
|
), # includes 429; markers decide retry vs not
|
|
892
|
-
network_excs=(gerrors.ServerError,)
|
|
895
|
+
network_excs=(module.gerrors.ServerError,)
|
|
893
896
|
+ _HTTPX_NET_EXCS
|
|
894
897
|
+ _REQUESTS_EXCS, # treat 5xx as transient
|
|
895
898
|
http_excs=(), # no reliable .status_code on exceptions; handled above
|
deepeval/models/utils.py
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Optional, Tuple
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from deepeval.errors import DeepEvalError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
2
9
|
|
|
3
10
|
|
|
4
11
|
def parse_model_name(model_name: Optional[str] = None) -> str:
|
|
@@ -25,7 +32,90 @@ def parse_model_name(model_name: Optional[str] = None) -> str:
|
|
|
25
32
|
if model_name is None:
|
|
26
33
|
return None
|
|
27
34
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
35
|
+
if "/" in model_name:
|
|
36
|
+
_, parsed_model_name = model_name.split("/", 1)
|
|
37
|
+
return parsed_model_name
|
|
31
38
|
return model_name
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def require_secret_api_key(
|
|
42
|
+
secret: Optional[SecretStr],
|
|
43
|
+
*,
|
|
44
|
+
provider_label: str,
|
|
45
|
+
env_var_name: str,
|
|
46
|
+
param_hint: str,
|
|
47
|
+
) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Normalize and validate a provider API key stored as a SecretStr.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
secret:
|
|
53
|
+
The SecretStr coming from Settings or an explicit constructor arg.
|
|
54
|
+
provider_label:
|
|
55
|
+
Human readable provider name for error messages, such as Anthropic, or OpenAI etc
|
|
56
|
+
env_var_name:
|
|
57
|
+
The environment variable backing this key
|
|
58
|
+
param_hint:
|
|
59
|
+
A short hint telling users how to pass the key explicitly
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The underlying API key string.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
DeepEvalError: if the key is missing or empty.
|
|
66
|
+
"""
|
|
67
|
+
if secret is None:
|
|
68
|
+
raise DeepEvalError(
|
|
69
|
+
f"{provider_label} API key is not configured. "
|
|
70
|
+
f"Set {env_var_name} in your environment or pass "
|
|
71
|
+
f"{param_hint}."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
api_key = secret.get_secret_value()
|
|
75
|
+
if not api_key:
|
|
76
|
+
raise DeepEvalError(
|
|
77
|
+
f"{provider_label} API key is empty. Please configure a valid key."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
return api_key
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def normalize_kwargs_and_extract_aliases(
|
|
84
|
+
provider_label: str,
|
|
85
|
+
kwargs: Dict[str, Any],
|
|
86
|
+
alias_map: Dict[str, list],
|
|
87
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Normalize legacy keyword argument names according to alias_map.
|
|
90
|
+
|
|
91
|
+
alias_map is of the form: {new_name: [old_name1, old_name2, ...]}
|
|
92
|
+
|
|
93
|
+
- Returns (normalized_kwargs, extracted_values)
|
|
94
|
+
where:
|
|
95
|
+
- normalized_kwargs has all legacy keys removed (to prevent forwarding
|
|
96
|
+
to downstream SDK clients).
|
|
97
|
+
- extracted_values maps new_name -> value for any alias that was used.
|
|
98
|
+
|
|
99
|
+
- Logs a warning for each legacy keyword used, so callers know they should
|
|
100
|
+
migrate to the new name.
|
|
101
|
+
"""
|
|
102
|
+
normalized = dict(kwargs)
|
|
103
|
+
extracted: Dict[str, Any] = {}
|
|
104
|
+
|
|
105
|
+
for new_name, old_names in alias_map.items():
|
|
106
|
+
for old_name in old_names:
|
|
107
|
+
if old_name in normalized:
|
|
108
|
+
value = normalized.pop(old_name)
|
|
109
|
+
|
|
110
|
+
logger.warning(
|
|
111
|
+
"%s keyword '%s' is deprecated; please use '%s' instead.",
|
|
112
|
+
provider_label,
|
|
113
|
+
old_name,
|
|
114
|
+
new_name,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Only preserve the first alias value we see for a given new_name
|
|
118
|
+
if new_name not in extracted:
|
|
119
|
+
extracted[new_name] = value
|
|
120
|
+
|
|
121
|
+
return normalized, extracted
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Union, List, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
from deepeval.models.base_model import DeepEvalBaseLLM
|
|
5
|
+
from deepeval.optimizer.scorer.base import BaseScorer
|
|
6
|
+
from deepeval.prompt.prompt import Prompt
|
|
7
|
+
from deepeval.dataset.golden import Golden, ConversationalGolden
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseAlgorithm(ABC):
|
|
11
|
+
name: str
|
|
12
|
+
optimizer_model: DeepEvalBaseLLM
|
|
13
|
+
scorer: BaseScorer
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def execute(
|
|
17
|
+
self,
|
|
18
|
+
prompt: Prompt,
|
|
19
|
+
goldens: Union[List[Golden], List[ConversationalGolden]],
|
|
20
|
+
) -> Tuple[Prompt, Dict]:
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
async def a_execute(
|
|
25
|
+
self,
|
|
26
|
+
prompt: Prompt,
|
|
27
|
+
goldens: Union[List[Golden], List[ConversationalGolden]],
|
|
28
|
+
) -> Tuple[Prompt, Dict]:
|
|
29
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Internal GEPA constants - not exposed to users
|
|
2
|
+
GEPA_MIN_DELTA: float = 0.0
|
|
3
|
+
GEPA_TIE_TOLERANCE: float = 1e-9
|
|
4
|
+
GEPA_REWRITE_INSTRUCTION_MAX_CHARS: int = 4096
|
|
5
|
+
|
|
6
|
+
# Internal MIPROV2 constants - not exposed to users
|
|
7
|
+
MIPROV2_MIN_DELTA: float = 0.0
|
|
8
|
+
MIPROV2_REWRITE_INSTRUCTION_MAX_CHARS: int = 4096
|
|
9
|
+
MIPROV2_DEFAULT_NUM_CANDIDATES: int = 10
|
|
10
|
+
MIPROV2_DEFAULT_NUM_TRIALS: int = 20
|
|
11
|
+
MIPROV2_DEFAULT_MINIBATCH_SIZE: int = 25
|
|
12
|
+
MIPROV2_DEFAULT_MINIBATCH_FULL_EVAL_STEPS: int = 10
|
|
13
|
+
MIPROV2_DEFAULT_MAX_BOOTSTRAPPED_DEMOS: int = 4
|
|
14
|
+
MIPROV2_DEFAULT_MAX_LABELED_DEMOS: int = 4
|
|
15
|
+
MIPROV2_DEFAULT_NUM_DEMO_SETS: int = 5
|
|
16
|
+
|
|
17
|
+
# Internal SIMBA constants - not exposed to users
|
|
18
|
+
SIMBA_DEMO_INPUT_MAX_CHARS: int = 256
|