deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -1,71 +1,159 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict
|
|
3
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
4
4
|
from pydantic import BaseModel, SecretStr
|
|
5
5
|
|
|
6
|
+
from deepeval.errors import DeepEvalError
|
|
6
7
|
from deepeval.config.settings import get_settings
|
|
7
8
|
from deepeval.models import DeepEvalBaseLLM
|
|
8
|
-
from deepeval.models.llms.
|
|
9
|
-
structured_outputs_models,
|
|
10
|
-
json_mode_models,
|
|
11
|
-
model_pricing,
|
|
12
|
-
)
|
|
9
|
+
from deepeval.models.llms.constants import OPENAI_MODELS_DATA
|
|
13
10
|
from deepeval.models.retry_policy import (
|
|
14
11
|
create_retry_decorator,
|
|
15
12
|
sdk_retries_for,
|
|
16
13
|
)
|
|
17
|
-
|
|
18
|
-
from deepeval.
|
|
19
|
-
|
|
14
|
+
from deepeval.test_case import MLLMImage
|
|
15
|
+
from deepeval.utils import (
|
|
16
|
+
convert_to_multi_modal_array,
|
|
17
|
+
check_if_multimodal,
|
|
18
|
+
require_param,
|
|
19
|
+
)
|
|
20
|
+
from deepeval.models.llms.utils import (
|
|
21
|
+
trim_and_load_json,
|
|
22
|
+
)
|
|
23
|
+
from deepeval.models.utils import (
|
|
24
|
+
parse_model_name,
|
|
25
|
+
require_secret_api_key,
|
|
26
|
+
require_costs,
|
|
27
|
+
normalize_kwargs_and_extract_aliases,
|
|
28
|
+
)
|
|
20
29
|
from deepeval.constants import ProviderSlug as PS
|
|
21
30
|
|
|
22
|
-
|
|
23
31
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
24
32
|
|
|
33
|
+
_ALIAS_MAP = {
|
|
34
|
+
"api_key": ["azure_openai_api_key"],
|
|
35
|
+
"base_url": ["azure_endpoint"],
|
|
36
|
+
}
|
|
37
|
+
|
|
25
38
|
|
|
26
39
|
class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
27
40
|
def __init__(
|
|
28
41
|
self,
|
|
42
|
+
model: Optional[str] = None,
|
|
43
|
+
api_key: Optional[str] = None,
|
|
44
|
+
base_url: Optional[str] = None,
|
|
45
|
+
temperature: Optional[float] = None,
|
|
46
|
+
cost_per_input_token: Optional[float] = None,
|
|
47
|
+
cost_per_output_token: Optional[float] = None,
|
|
29
48
|
deployment_name: Optional[str] = None,
|
|
30
|
-
|
|
31
|
-
azure_openai_api_key: Optional[str] = None,
|
|
32
|
-
openai_api_version: Optional[str] = None,
|
|
33
|
-
azure_endpoint: Optional[str] = None,
|
|
34
|
-
temperature: float = 0,
|
|
49
|
+
api_version: Optional[str] = None,
|
|
35
50
|
generation_kwargs: Optional[Dict] = None,
|
|
36
51
|
**kwargs,
|
|
37
52
|
):
|
|
38
53
|
settings = get_settings()
|
|
54
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
55
|
+
"AzureOpenAIModel",
|
|
56
|
+
kwargs,
|
|
57
|
+
_ALIAS_MAP,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# re-map deprecated keywords to re-named positional args
|
|
61
|
+
if api_key is None and "api_key" in alias_values:
|
|
62
|
+
api_key = alias_values["api_key"]
|
|
63
|
+
if base_url is None and "base_url" in alias_values:
|
|
64
|
+
base_url = alias_values["base_url"]
|
|
39
65
|
|
|
40
66
|
# fetch Azure deployment parameters
|
|
41
|
-
|
|
42
|
-
|
|
67
|
+
model = model or settings.AZURE_MODEL_NAME
|
|
68
|
+
deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
43
69
|
|
|
44
|
-
if
|
|
70
|
+
if api_key is not None:
|
|
45
71
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
46
|
-
self.
|
|
47
|
-
azure_openai_api_key
|
|
48
|
-
)
|
|
72
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
49
73
|
else:
|
|
50
|
-
self.
|
|
74
|
+
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
51
75
|
|
|
52
|
-
|
|
53
|
-
|
|
76
|
+
api_version = api_version or settings.OPENAI_API_VERSION
|
|
77
|
+
if base_url is not None:
|
|
78
|
+
base_url = str(base_url).rstrip("/")
|
|
79
|
+
elif settings.AZURE_OPENAI_ENDPOINT is not None:
|
|
80
|
+
base_url = str(settings.AZURE_OPENAI_ENDPOINT).rstrip("/")
|
|
81
|
+
|
|
82
|
+
if temperature is not None:
|
|
83
|
+
temperature = float(temperature)
|
|
84
|
+
elif settings.TEMPERATURE is not None:
|
|
85
|
+
temperature = settings.TEMPERATURE
|
|
86
|
+
else:
|
|
87
|
+
temperature = 0.0
|
|
88
|
+
|
|
89
|
+
cost_per_input_token = (
|
|
90
|
+
cost_per_input_token
|
|
91
|
+
if cost_per_input_token is not None
|
|
92
|
+
else settings.OPENAI_COST_PER_INPUT_TOKEN
|
|
93
|
+
)
|
|
94
|
+
cost_per_output_token = (
|
|
95
|
+
cost_per_output_token
|
|
96
|
+
if cost_per_output_token is not None
|
|
97
|
+
else settings.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
54
98
|
)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
99
|
+
|
|
100
|
+
# validation
|
|
101
|
+
model = require_param(
|
|
102
|
+
model,
|
|
103
|
+
provider_label="AzureOpenAIModel",
|
|
104
|
+
env_var_name="AZURE_MODEL_NAME",
|
|
105
|
+
param_hint="model",
|
|
59
106
|
)
|
|
60
107
|
|
|
108
|
+
self.deployment_name = require_param(
|
|
109
|
+
deployment_name,
|
|
110
|
+
provider_label="AzureOpenAIModel",
|
|
111
|
+
env_var_name="AZURE_DEPLOYMENT_NAME",
|
|
112
|
+
param_hint="deployment_name",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
self.base_url = require_param(
|
|
116
|
+
base_url,
|
|
117
|
+
provider_label="AzureOpenAIModel",
|
|
118
|
+
env_var_name="AZURE_OPENAI_ENDPOINT",
|
|
119
|
+
param_hint="base_url",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.api_version = require_param(
|
|
123
|
+
api_version,
|
|
124
|
+
provider_label="AzureOpenAIModel",
|
|
125
|
+
env_var_name="OPENAI_API_VERSION",
|
|
126
|
+
param_hint="api_version",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
self.model_data = OPENAI_MODELS_DATA.get(model)
|
|
130
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
131
|
+
self.model_data,
|
|
132
|
+
model,
|
|
133
|
+
"OPENAI_COST_PER_INPUT_TOKEN",
|
|
134
|
+
"OPENAI_COST_PER_OUTPUT_TOKEN",
|
|
135
|
+
cost_per_input_token,
|
|
136
|
+
cost_per_output_token,
|
|
137
|
+
)
|
|
138
|
+
self.model_data.input_price = cost_per_input_token
|
|
139
|
+
self.model_data.output_price = cost_per_output_token
|
|
140
|
+
|
|
61
141
|
if temperature < 0:
|
|
62
|
-
raise
|
|
142
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
63
143
|
self.temperature = temperature
|
|
64
144
|
|
|
65
|
-
#
|
|
66
|
-
self.kwargs =
|
|
67
|
-
self.
|
|
68
|
-
|
|
145
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
146
|
+
self.kwargs = normalized_kwargs
|
|
147
|
+
self.kwargs.pop(
|
|
148
|
+
"temperature", None
|
|
149
|
+
) # to avoid duplicate with self.temperature
|
|
150
|
+
|
|
151
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
152
|
+
self.generation_kwargs.pop(
|
|
153
|
+
"temperature", None
|
|
154
|
+
) # to avoid duplicate with self.temperature
|
|
155
|
+
|
|
156
|
+
super().__init__(parse_model_name(model))
|
|
69
157
|
|
|
70
158
|
###############################################
|
|
71
159
|
# Other generate functions
|
|
@@ -74,17 +162,23 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
74
162
|
@retry_azure
|
|
75
163
|
def generate(
|
|
76
164
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
77
|
-
) -> Tuple[Union[str,
|
|
165
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
78
166
|
client = self.load_model(async_mode=False)
|
|
167
|
+
|
|
168
|
+
if check_if_multimodal(prompt):
|
|
169
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
170
|
+
content = self.generate_content(prompt)
|
|
171
|
+
else:
|
|
172
|
+
content = [{"type": "text", "text": prompt}]
|
|
173
|
+
|
|
79
174
|
if schema:
|
|
80
|
-
if self.
|
|
175
|
+
if self.model_data.supports_structured_outputs:
|
|
81
176
|
completion = client.beta.chat.completions.parse(
|
|
82
177
|
model=self.deployment_name,
|
|
83
|
-
messages=[
|
|
84
|
-
{"role": "user", "content": prompt},
|
|
85
|
-
],
|
|
178
|
+
messages=[{"role": "user", "content": content}],
|
|
86
179
|
response_format=schema,
|
|
87
180
|
temperature=self.temperature,
|
|
181
|
+
**self.generation_kwargs,
|
|
88
182
|
)
|
|
89
183
|
structured_output: BaseModel = completion.choices[
|
|
90
184
|
0
|
|
@@ -94,14 +188,15 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
94
188
|
completion.usage.completion_tokens,
|
|
95
189
|
)
|
|
96
190
|
return structured_output, cost
|
|
97
|
-
if self.
|
|
191
|
+
if self.model_data.supports_json:
|
|
98
192
|
completion = client.beta.chat.completions.parse(
|
|
99
193
|
model=self.deployment_name,
|
|
100
194
|
messages=[
|
|
101
|
-
{"role": "user", "content":
|
|
195
|
+
{"role": "user", "content": content},
|
|
102
196
|
],
|
|
103
197
|
response_format={"type": "json_object"},
|
|
104
198
|
temperature=self.temperature,
|
|
199
|
+
**self.generation_kwargs,
|
|
105
200
|
)
|
|
106
201
|
json_output = trim_and_load_json(
|
|
107
202
|
completion.choices[0].message.content
|
|
@@ -115,7 +210,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
115
210
|
completion = client.chat.completions.create(
|
|
116
211
|
model=self.deployment_name,
|
|
117
212
|
messages=[
|
|
118
|
-
{"role": "user", "content":
|
|
213
|
+
{"role": "user", "content": content},
|
|
119
214
|
],
|
|
120
215
|
temperature=self.temperature,
|
|
121
216
|
**self.generation_kwargs,
|
|
@@ -135,15 +230,21 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
135
230
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
136
231
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
137
232
|
client = self.load_model(async_mode=True)
|
|
233
|
+
|
|
234
|
+
if check_if_multimodal(prompt):
|
|
235
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
236
|
+
content = self.generate_content(prompt)
|
|
237
|
+
else:
|
|
238
|
+
content = [{"type": "text", "text": prompt}]
|
|
239
|
+
|
|
138
240
|
if schema:
|
|
139
|
-
if self.
|
|
241
|
+
if self.model_data.supports_structured_outputs:
|
|
140
242
|
completion = await client.beta.chat.completions.parse(
|
|
141
243
|
model=self.deployment_name,
|
|
142
|
-
messages=[
|
|
143
|
-
{"role": "user", "content": prompt},
|
|
144
|
-
],
|
|
244
|
+
messages=[{"role": "user", "content": content}],
|
|
145
245
|
response_format=schema,
|
|
146
246
|
temperature=self.temperature,
|
|
247
|
+
**self.generation_kwargs,
|
|
147
248
|
)
|
|
148
249
|
structured_output: BaseModel = completion.choices[
|
|
149
250
|
0
|
|
@@ -153,11 +254,11 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
153
254
|
completion.usage.completion_tokens,
|
|
154
255
|
)
|
|
155
256
|
return structured_output, cost
|
|
156
|
-
if self.
|
|
257
|
+
if self.model_data.supports_json:
|
|
157
258
|
completion = await client.beta.chat.completions.parse(
|
|
158
259
|
model=self.deployment_name,
|
|
159
260
|
messages=[
|
|
160
|
-
{"role": "user", "content":
|
|
261
|
+
{"role": "user", "content": content},
|
|
161
262
|
],
|
|
162
263
|
response_format={"type": "json_object"},
|
|
163
264
|
temperature=self.temperature,
|
|
@@ -175,7 +276,7 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
175
276
|
completion = await client.chat.completions.create(
|
|
176
277
|
model=self.deployment_name,
|
|
177
278
|
messages=[
|
|
178
|
-
{"role": "user", "content":
|
|
279
|
+
{"role": "user", "content": content},
|
|
179
280
|
],
|
|
180
281
|
temperature=self.temperature,
|
|
181
282
|
**self.generation_kwargs,
|
|
@@ -203,9 +304,14 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
203
304
|
) -> Tuple[ChatCompletion, float]:
|
|
204
305
|
# Generate completion
|
|
205
306
|
client = self.load_model(async_mode=False)
|
|
307
|
+
if check_if_multimodal(prompt):
|
|
308
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
309
|
+
content = self.generate_content(prompt)
|
|
310
|
+
else:
|
|
311
|
+
content = [{"type": "text", "text": prompt}]
|
|
206
312
|
completion = client.chat.completions.create(
|
|
207
313
|
model=self.deployment_name,
|
|
208
|
-
messages=[{"role": "user", "content":
|
|
314
|
+
messages=[{"role": "user", "content": content}],
|
|
209
315
|
temperature=self.temperature,
|
|
210
316
|
logprobs=True,
|
|
211
317
|
top_logprobs=top_logprobs,
|
|
@@ -226,9 +332,14 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
226
332
|
) -> Tuple[ChatCompletion, float]:
|
|
227
333
|
# Generate completion
|
|
228
334
|
client = self.load_model(async_mode=True)
|
|
335
|
+
if check_if_multimodal(prompt):
|
|
336
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
337
|
+
content = self.generate_content(prompt)
|
|
338
|
+
else:
|
|
339
|
+
content = [{"type": "text", "text": prompt}]
|
|
229
340
|
completion = await client.chat.completions.create(
|
|
230
341
|
model=self.deployment_name,
|
|
231
|
-
messages=[{"role": "user", "content":
|
|
342
|
+
messages=[{"role": "user", "content": content}],
|
|
232
343
|
temperature=self.temperature,
|
|
233
344
|
logprobs=True,
|
|
234
345
|
top_logprobs=top_logprobs,
|
|
@@ -241,22 +352,66 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
241
352
|
|
|
242
353
|
return completion, cost
|
|
243
354
|
|
|
355
|
+
def generate_content(
|
|
356
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
357
|
+
):
|
|
358
|
+
multimodal_input = [] if multimodal_input is None else multimodal_input
|
|
359
|
+
content = []
|
|
360
|
+
for element in multimodal_input:
|
|
361
|
+
if isinstance(element, str):
|
|
362
|
+
content.append({"type": "text", "text": element})
|
|
363
|
+
elif isinstance(element, MLLMImage):
|
|
364
|
+
if element.url and not element.local:
|
|
365
|
+
content.append(
|
|
366
|
+
{
|
|
367
|
+
"type": "image_url",
|
|
368
|
+
"image_url": {"url": element.url},
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
else:
|
|
372
|
+
element.ensure_images_loaded()
|
|
373
|
+
data_uri = (
|
|
374
|
+
f"data:{element.mimeType};base64,{element.dataBase64}"
|
|
375
|
+
)
|
|
376
|
+
content.append(
|
|
377
|
+
{
|
|
378
|
+
"type": "image_url",
|
|
379
|
+
"image_url": {"url": data_uri},
|
|
380
|
+
}
|
|
381
|
+
)
|
|
382
|
+
return content
|
|
383
|
+
|
|
244
384
|
###############################################
|
|
245
385
|
# Utilities
|
|
246
386
|
###############################################
|
|
247
387
|
|
|
248
388
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
output_cost = output_tokens * pricing["output"]
|
|
389
|
+
input_cost = input_tokens * self.model_data.input_price
|
|
390
|
+
output_cost = output_tokens * self.model_data.output_price
|
|
252
391
|
return input_cost + output_cost
|
|
253
392
|
|
|
254
393
|
###############################################
|
|
255
|
-
#
|
|
394
|
+
# Capabilities
|
|
256
395
|
###############################################
|
|
257
396
|
|
|
258
|
-
def
|
|
259
|
-
return
|
|
397
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
398
|
+
return self.model_data.supports_log_probs
|
|
399
|
+
|
|
400
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
401
|
+
return self.model_data.supports_temperature
|
|
402
|
+
|
|
403
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
404
|
+
return self.model_data.supports_multimodal
|
|
405
|
+
|
|
406
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
407
|
+
return self.model_data.supports_structured_outputs
|
|
408
|
+
|
|
409
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
410
|
+
return self.model_data.supports_json
|
|
411
|
+
|
|
412
|
+
###############################################
|
|
413
|
+
# Model
|
|
414
|
+
###############################################
|
|
260
415
|
|
|
261
416
|
def load_model(self, async_mode: bool = False):
|
|
262
417
|
if not async_mode:
|
|
@@ -276,16 +431,16 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
276
431
|
|
|
277
432
|
def _build_client(self, cls):
|
|
278
433
|
api_key = require_secret_api_key(
|
|
279
|
-
self.
|
|
434
|
+
self.api_key,
|
|
280
435
|
provider_label="AzureOpenAI",
|
|
281
436
|
env_var_name="AZURE_OPENAI_API_KEY",
|
|
282
|
-
param_hint="`
|
|
437
|
+
param_hint="`api_key` to AzureOpenAIModel(...)",
|
|
283
438
|
)
|
|
284
439
|
|
|
285
440
|
kw = dict(
|
|
286
441
|
api_key=api_key,
|
|
287
|
-
api_version=self.
|
|
288
|
-
azure_endpoint=self.
|
|
442
|
+
api_version=self.api_version,
|
|
443
|
+
azure_endpoint=self.base_url,
|
|
289
444
|
azure_deployment=self.deployment_name,
|
|
290
445
|
**self._client_kwargs(),
|
|
291
446
|
)
|
|
@@ -297,3 +452,6 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
297
452
|
kw.pop("max_retries", None)
|
|
298
453
|
return cls(**kw)
|
|
299
454
|
raise
|
|
455
|
+
|
|
456
|
+
def get_model_name(self):
|
|
457
|
+
return f"{self.name} (Azure)"
|