deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -1,234 +1,77 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
|
-
from typing import Optional, Tuple, Union, Dict
|
|
2
|
+
from typing import Optional, Tuple, Union, Dict, List
|
|
3
|
+
from deepeval.test_case import MLLMImage
|
|
3
4
|
from pydantic import BaseModel, SecretStr
|
|
4
|
-
|
|
5
5
|
from openai import (
|
|
6
6
|
OpenAI,
|
|
7
7
|
AsyncOpenAI,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
+
from deepeval.errors import DeepEvalError
|
|
11
|
+
from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
|
|
10
12
|
from deepeval.config.settings import get_settings
|
|
11
13
|
from deepeval.constants import ProviderSlug as PS
|
|
12
14
|
from deepeval.models import DeepEvalBaseLLM
|
|
13
15
|
from deepeval.models.llms.utils import trim_and_load_json
|
|
14
|
-
from deepeval.models.utils import
|
|
16
|
+
from deepeval.models.utils import (
|
|
17
|
+
parse_model_name,
|
|
18
|
+
require_costs,
|
|
19
|
+
require_secret_api_key,
|
|
20
|
+
normalize_kwargs_and_extract_aliases,
|
|
21
|
+
)
|
|
15
22
|
from deepeval.models.retry_policy import (
|
|
16
23
|
create_retry_decorator,
|
|
17
24
|
sdk_retries_for,
|
|
18
25
|
)
|
|
26
|
+
from deepeval.models.llms.constants import (
|
|
27
|
+
OPENAI_MODELS_DATA,
|
|
28
|
+
)
|
|
19
29
|
|
|
20
30
|
|
|
21
31
|
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
22
32
|
|
|
23
|
-
valid_gpt_models = [
|
|
24
|
-
"gpt-3.5-turbo",
|
|
25
|
-
"gpt-3.5-turbo-0125",
|
|
26
|
-
"gpt-3.5-turbo-1106",
|
|
27
|
-
"gpt-4-0125-preview",
|
|
28
|
-
"gpt-4-1106-preview",
|
|
29
|
-
"gpt-4-turbo",
|
|
30
|
-
"gpt-4-turbo-2024-04-09",
|
|
31
|
-
"gpt-4-turbo-preview",
|
|
32
|
-
"gpt-4o",
|
|
33
|
-
"gpt-4o-2024-05-13",
|
|
34
|
-
"gpt-4o-2024-08-06",
|
|
35
|
-
"gpt-4o-2024-11-20",
|
|
36
|
-
"gpt-4o-mini",
|
|
37
|
-
"gpt-4o-mini-2024-07-18",
|
|
38
|
-
"gpt-4-32k",
|
|
39
|
-
"gpt-4-32k-0613",
|
|
40
|
-
"gpt-4.1",
|
|
41
|
-
"gpt-4.1-mini",
|
|
42
|
-
"gpt-4.1-nano",
|
|
43
|
-
"gpt-4.5-preview",
|
|
44
|
-
"o1",
|
|
45
|
-
"o1-preview",
|
|
46
|
-
"o1-2024-12-17",
|
|
47
|
-
"o1-preview-2024-09-12",
|
|
48
|
-
"o1-mini",
|
|
49
|
-
"o1-mini-2024-09-12",
|
|
50
|
-
"o3-mini",
|
|
51
|
-
"o3-mini-2025-01-31",
|
|
52
|
-
"o4-mini",
|
|
53
|
-
"o4-mini-2025-04-16",
|
|
54
|
-
"gpt-4.5-preview-2025-02-27",
|
|
55
|
-
"gpt-5",
|
|
56
|
-
"gpt-5-2025-08-07",
|
|
57
|
-
"gpt-5-mini",
|
|
58
|
-
"gpt-5-mini-2025-08-07",
|
|
59
|
-
"gpt-5-nano",
|
|
60
|
-
"gpt-5-nano-2025-08-07",
|
|
61
|
-
"gpt-5-chat-latest",
|
|
62
|
-
]
|
|
63
|
-
|
|
64
|
-
unsupported_log_probs_gpt_models = [
|
|
65
|
-
"o1",
|
|
66
|
-
"o1-preview",
|
|
67
|
-
"o1-2024-12-17",
|
|
68
|
-
"o1-preview-2024-09-12",
|
|
69
|
-
"o1-mini",
|
|
70
|
-
"o1-mini-2024-09-12",
|
|
71
|
-
"o3-mini",
|
|
72
|
-
"o3-mini-2025-01-31",
|
|
73
|
-
"o4-mini",
|
|
74
|
-
"o4-mini-2025-04-16",
|
|
75
|
-
"gpt-4.5-preview-2025-02-27",
|
|
76
|
-
"gpt-5",
|
|
77
|
-
"gpt-5-2025-08-07",
|
|
78
|
-
"gpt-5-mini",
|
|
79
|
-
"gpt-5-mini-2025-08-07",
|
|
80
|
-
"gpt-5-nano",
|
|
81
|
-
"gpt-5-nano-2025-08-07",
|
|
82
|
-
"gpt-5-chat-latest",
|
|
83
|
-
]
|
|
84
|
-
|
|
85
|
-
structured_outputs_models = [
|
|
86
|
-
"gpt-4o",
|
|
87
|
-
"gpt-4o-2024-05-13",
|
|
88
|
-
"gpt-4o-2024-08-06",
|
|
89
|
-
"gpt-4o-2024-11-20",
|
|
90
|
-
"gpt-4o-mini",
|
|
91
|
-
"gpt-4o-mini-2024-07-18",
|
|
92
|
-
"gpt-4.1",
|
|
93
|
-
"gpt-4.1-mini",
|
|
94
|
-
"gpt-4.1-nano",
|
|
95
|
-
"o1",
|
|
96
|
-
"o1-preview",
|
|
97
|
-
"o1-2024-12-17",
|
|
98
|
-
"o3-mini",
|
|
99
|
-
"o3-mini-2025-01-31",
|
|
100
|
-
"o4-mini",
|
|
101
|
-
"o4-mini-2025-04-16",
|
|
102
|
-
"gpt-4.5-preview-2025-02-27",
|
|
103
|
-
"gpt-5",
|
|
104
|
-
"gpt-5-2025-08-07",
|
|
105
|
-
"gpt-5-mini",
|
|
106
|
-
"gpt-5-mini-2025-08-07",
|
|
107
|
-
"gpt-5-nano",
|
|
108
|
-
"gpt-5-nano-2025-08-07",
|
|
109
|
-
]
|
|
110
|
-
|
|
111
|
-
json_mode_models = [
|
|
112
|
-
"gpt-3.5-turbo",
|
|
113
|
-
"gpt-3.5-turbo-0125",
|
|
114
|
-
"gpt-3.5-turbo-1106",
|
|
115
|
-
"gpt-4-0125-preview",
|
|
116
|
-
"gpt-4-1106-preview",
|
|
117
|
-
"gpt-4-turbo",
|
|
118
|
-
"gpt-4-turbo-2024-04-09",
|
|
119
|
-
"gpt-4-turbo-preview",
|
|
120
|
-
"gpt-4-32k",
|
|
121
|
-
"gpt-4-32k-0613",
|
|
122
|
-
]
|
|
123
|
-
|
|
124
|
-
model_pricing = {
|
|
125
|
-
"gpt-4o-mini": {"input": 0.150 / 1e6, "output": 0.600 / 1e6},
|
|
126
|
-
"gpt-4o": {"input": 2.50 / 1e6, "output": 10.00 / 1e6},
|
|
127
|
-
"gpt-4-turbo": {"input": 10.00 / 1e6, "output": 30.00 / 1e6},
|
|
128
|
-
"gpt-4-turbo-preview": {"input": 10.00 / 1e6, "output": 30.00 / 1e6},
|
|
129
|
-
"gpt-4-0125-preview": {"input": 10.00 / 1e6, "output": 30.00 / 1e6},
|
|
130
|
-
"gpt-4-1106-preview": {"input": 10.00 / 1e6, "output": 30.00 / 1e6},
|
|
131
|
-
"gpt-4": {"input": 30.00 / 1e6, "output": 60.00 / 1e6},
|
|
132
|
-
"gpt-4-32k": {"input": 60.00 / 1e6, "output": 120.00 / 1e6},
|
|
133
|
-
"gpt-3.5-turbo-1106": {"input": 1.00 / 1e6, "output": 2.00 / 1e6},
|
|
134
|
-
"gpt-3.5-turbo": {"input": 0.50 / 1e6, "output": 1.50 / 1e6},
|
|
135
|
-
"gpt-3.5-turbo-16k": {"input": 3.00 / 1e6, "output": 4.00 / 1e6},
|
|
136
|
-
"gpt-3.5-turbo-0125": {"input": 0.50 / 1e6, "output": 1.50 / 1e6},
|
|
137
|
-
"gpt-3.5-turbo-instruct": {"input": 1.50 / 1e6, "output": 2.00 / 1e6},
|
|
138
|
-
"o1": {"input": 15.00 / 1e6, "output": 60.00 / 1e6},
|
|
139
|
-
"o1-preview": {"input": 15.00 / 1e6, "output": 60.00 / 1e6},
|
|
140
|
-
"o1-2024-12-17": {"input": 15.00 / 1e6, "output": 60.00 / 1e6},
|
|
141
|
-
"o3-mini": {"input": 1.10 / 1e6, "output": 4.40 / 1e6},
|
|
142
|
-
"o3-mini-2025-01-31": {"input": 1.10 / 1e6, "output": 4.40 / 1e6},
|
|
143
|
-
"o4-mini": {"input": 1.10 / 1e6, "output": 4.40 / 1e6},
|
|
144
|
-
"o4-mini-2025-04-16": {"input": 1.10 / 1e6, "output": 4.40 / 1e6},
|
|
145
|
-
"gpt-4.1": {
|
|
146
|
-
"input": 2.00 / 1e6,
|
|
147
|
-
"output": 8.00 / 1e6,
|
|
148
|
-
},
|
|
149
|
-
"gpt-4.1-mini": {
|
|
150
|
-
"input": 0.4 / 1e6,
|
|
151
|
-
"output": 1.60 / 1e6,
|
|
152
|
-
},
|
|
153
|
-
"gpt-4.1-nano": {
|
|
154
|
-
"input": 0.1 / 1e6,
|
|
155
|
-
"output": 0.4 / 1e6,
|
|
156
|
-
},
|
|
157
|
-
"gpt-4.5-preview": {
|
|
158
|
-
"input": 75.00 / 1e6,
|
|
159
|
-
"output": 150.00 / 1e6,
|
|
160
|
-
},
|
|
161
|
-
"gpt-5": {
|
|
162
|
-
"input": 1.25 / 1e6,
|
|
163
|
-
"output": 10.00 / 1e6,
|
|
164
|
-
},
|
|
165
|
-
"gpt-5-2025-08-07": {
|
|
166
|
-
"input": 1.25 / 1e6,
|
|
167
|
-
"output": 10.00 / 1e6,
|
|
168
|
-
},
|
|
169
|
-
"gpt-5-mini": {
|
|
170
|
-
"input": 0.25 / 1e6,
|
|
171
|
-
"output": 2.00 / 1e6,
|
|
172
|
-
},
|
|
173
|
-
"gpt-5-mini-2025-08-07": {
|
|
174
|
-
"input": 0.25 / 1e6,
|
|
175
|
-
"output": 2.00 / 1e6,
|
|
176
|
-
},
|
|
177
|
-
"gpt-5-nano": {
|
|
178
|
-
"input": 0.05 / 1e6,
|
|
179
|
-
"output": 0.40 / 1e6,
|
|
180
|
-
},
|
|
181
|
-
"gpt-5-nano-2025-08-07": {
|
|
182
|
-
"input": 0.05 / 1e6,
|
|
183
|
-
"output": 0.40 / 1e6,
|
|
184
|
-
},
|
|
185
|
-
"gpt-5-chat-latest": {
|
|
186
|
-
"input": 1.25 / 1e6,
|
|
187
|
-
"output": 10.00 / 1e6,
|
|
188
|
-
},
|
|
189
|
-
}
|
|
190
|
-
|
|
191
33
|
default_gpt_model = "gpt-4.1"
|
|
192
34
|
|
|
193
|
-
# Thinking models that require temperature=1
|
|
194
|
-
models_requiring_temperature_1 = [
|
|
195
|
-
"o1",
|
|
196
|
-
"o1-2024-12-17",
|
|
197
|
-
"o1-mini",
|
|
198
|
-
"o1-mini-2024-09-12",
|
|
199
|
-
"o3-mini",
|
|
200
|
-
"o3-mini-2025-01-31",
|
|
201
|
-
"o4-mini",
|
|
202
|
-
"o4-mini-2025-04-16",
|
|
203
|
-
"gpt-5",
|
|
204
|
-
"gpt-5-2025-08-07",
|
|
205
|
-
"gpt-5-mini",
|
|
206
|
-
"gpt-5-mini-2025-08-07",
|
|
207
|
-
"gpt-5-nano",
|
|
208
|
-
"gpt-5-nano-2025-08-07",
|
|
209
|
-
]
|
|
210
|
-
|
|
211
35
|
|
|
212
36
|
def _request_timeout_seconds() -> float:
|
|
213
37
|
timeout = float(get_settings().DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS or 0)
|
|
214
38
|
return timeout if timeout > 0 else 30.0
|
|
215
39
|
|
|
216
40
|
|
|
41
|
+
_ALIAS_MAP = {
|
|
42
|
+
"api_key": ["_openai_api_key"],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
217
46
|
class GPTModel(DeepEvalBaseLLM):
|
|
47
|
+
|
|
218
48
|
def __init__(
|
|
219
49
|
self,
|
|
220
50
|
model: Optional[str] = None,
|
|
221
|
-
|
|
51
|
+
api_key: Optional[str] = None,
|
|
222
52
|
base_url: Optional[str] = None,
|
|
53
|
+
temperature: Optional[float] = None,
|
|
223
54
|
cost_per_input_token: Optional[float] = None,
|
|
224
55
|
cost_per_output_token: Optional[float] = None,
|
|
225
|
-
temperature: float = 0,
|
|
226
56
|
generation_kwargs: Optional[Dict] = None,
|
|
227
57
|
**kwargs,
|
|
228
58
|
):
|
|
229
59
|
settings = get_settings()
|
|
230
|
-
|
|
60
|
+
|
|
61
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
62
|
+
"GPTModel",
|
|
63
|
+
kwargs,
|
|
64
|
+
_ALIAS_MAP,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# re-map depricated keywords to re-named positional args
|
|
68
|
+
if api_key is None and "api_key" in alias_values:
|
|
69
|
+
api_key = alias_values["api_key"]
|
|
70
|
+
|
|
231
71
|
model = model or settings.OPENAI_MODEL_NAME
|
|
72
|
+
if model is None:
|
|
73
|
+
model = default_gpt_model
|
|
74
|
+
|
|
232
75
|
cost_per_input_token = (
|
|
233
76
|
cost_per_input_token
|
|
234
77
|
if cost_per_input_token is not None
|
|
@@ -240,67 +83,80 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
240
83
|
else settings.OPENAI_COST_PER_OUTPUT_TOKEN
|
|
241
84
|
)
|
|
242
85
|
|
|
243
|
-
if
|
|
244
|
-
model_name = parse_model_name(model)
|
|
245
|
-
if model_name not in valid_gpt_models:
|
|
246
|
-
raise ValueError(
|
|
247
|
-
f"Invalid model. Available GPT models: {', '.join(model for model in valid_gpt_models)}"
|
|
248
|
-
)
|
|
249
|
-
elif model is None:
|
|
250
|
-
model_name = default_gpt_model
|
|
251
|
-
|
|
252
|
-
if model_name not in model_pricing:
|
|
253
|
-
if cost_per_input_token is None or cost_per_output_token is None:
|
|
254
|
-
raise ValueError(
|
|
255
|
-
f"No pricing available for `{model_name}`. "
|
|
256
|
-
"Please provide both `cost_per_input_token` and `cost_per_output_token` when initializing `GPTModel`, "
|
|
257
|
-
"or set them via the CLI:\n"
|
|
258
|
-
" deepeval set-openai --model=[...] --cost_per_input_token=[...] --cost_per_output_token=[...]"
|
|
259
|
-
)
|
|
260
|
-
else:
|
|
261
|
-
model_pricing[model_name] = {
|
|
262
|
-
"input": float(cost_per_input_token),
|
|
263
|
-
"output": float(cost_per_output_token),
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
elif model is None:
|
|
267
|
-
model_name = default_gpt_model
|
|
268
|
-
|
|
269
|
-
if _openai_api_key is not None:
|
|
86
|
+
if api_key is not None:
|
|
270
87
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
271
|
-
self.
|
|
88
|
+
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
272
89
|
else:
|
|
273
|
-
self.
|
|
90
|
+
self.api_key = settings.OPENAI_API_KEY
|
|
274
91
|
|
|
275
|
-
self.base_url =
|
|
92
|
+
self.base_url = (
|
|
93
|
+
str(base_url).rstrip("/") if base_url is not None else None
|
|
94
|
+
)
|
|
276
95
|
# args and kwargs will be passed to the underlying model, in load_model function
|
|
277
96
|
|
|
278
|
-
|
|
279
|
-
|
|
97
|
+
if temperature is not None:
|
|
98
|
+
temperature = float(temperature)
|
|
99
|
+
elif settings.TEMPERATURE is not None:
|
|
100
|
+
temperature = settings.TEMPERATURE
|
|
101
|
+
else:
|
|
102
|
+
temperature = 0.0
|
|
103
|
+
|
|
104
|
+
if isinstance(model, str):
|
|
105
|
+
model = parse_model_name(model)
|
|
106
|
+
|
|
107
|
+
self.model_data = OPENAI_MODELS_DATA.get(model)
|
|
108
|
+
|
|
109
|
+
# Auto-adjust temperature for known models that require it
|
|
110
|
+
if self.model_data.supports_temperature is False:
|
|
280
111
|
temperature = 1
|
|
281
112
|
|
|
113
|
+
# validation
|
|
114
|
+
cost_per_input_token, cost_per_output_token = require_costs(
|
|
115
|
+
self.model_data,
|
|
116
|
+
model,
|
|
117
|
+
"OPENAI_COST_PER_INPUT_TOKEN",
|
|
118
|
+
"OPENAI_COST_PER_OUTPUT_TOKEN",
|
|
119
|
+
cost_per_input_token,
|
|
120
|
+
cost_per_output_token,
|
|
121
|
+
)
|
|
122
|
+
self.model_data.input_price = cost_per_input_token
|
|
123
|
+
self.model_data.output_price = cost_per_output_token
|
|
124
|
+
|
|
282
125
|
if temperature < 0:
|
|
283
|
-
raise
|
|
126
|
+
raise DeepEvalError("Temperature must be >= 0.")
|
|
127
|
+
|
|
284
128
|
self.temperature = temperature
|
|
285
|
-
|
|
286
|
-
self.
|
|
287
|
-
|
|
129
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
130
|
+
self.kwargs = normalized_kwargs
|
|
131
|
+
self.kwargs.pop("temperature", None)
|
|
132
|
+
|
|
133
|
+
self.generation_kwargs = dict(generation_kwargs or {})
|
|
134
|
+
self.generation_kwargs.pop("temperature", None)
|
|
288
135
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
136
|
+
super().__init__(model)
|
|
137
|
+
|
|
138
|
+
######################
|
|
139
|
+
# Generate functions #
|
|
140
|
+
######################
|
|
292
141
|
|
|
293
142
|
@retry_openai
|
|
294
143
|
def generate(
|
|
295
144
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
296
|
-
) -> Tuple[Union[str,
|
|
145
|
+
) -> Tuple[Union[str, BaseModel], float]:
|
|
297
146
|
client = self.load_model(async_mode=False)
|
|
147
|
+
|
|
148
|
+
if check_if_multimodal(prompt):
|
|
149
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
150
|
+
content = self.generate_content(prompt)
|
|
151
|
+
else:
|
|
152
|
+
content = [{"type": "text", "text": prompt}]
|
|
153
|
+
|
|
298
154
|
if schema:
|
|
299
|
-
if self.
|
|
155
|
+
if self.supports_structured_outputs() is True:
|
|
300
156
|
completion = client.beta.chat.completions.parse(
|
|
301
|
-
model=self.
|
|
157
|
+
model=self.name,
|
|
302
158
|
messages=[
|
|
303
|
-
{"role": "user", "content":
|
|
159
|
+
{"role": "user", "content": content},
|
|
304
160
|
],
|
|
305
161
|
response_format=schema,
|
|
306
162
|
temperature=self.temperature,
|
|
@@ -314,11 +170,11 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
314
170
|
completion.usage.completion_tokens,
|
|
315
171
|
)
|
|
316
172
|
return structured_output, cost
|
|
317
|
-
if self.
|
|
173
|
+
if self.supports_json_mode() is True:
|
|
318
174
|
completion = client.beta.chat.completions.parse(
|
|
319
|
-
model=self.
|
|
175
|
+
model=self.name,
|
|
320
176
|
messages=[
|
|
321
|
-
{"role": "user", "content":
|
|
177
|
+
{"role": "user", "content": content},
|
|
322
178
|
],
|
|
323
179
|
response_format={"type": "json_object"},
|
|
324
180
|
temperature=self.temperature,
|
|
@@ -334,8 +190,8 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
334
190
|
return schema.model_validate(json_output), cost
|
|
335
191
|
|
|
336
192
|
completion = client.chat.completions.create(
|
|
337
|
-
model=self.
|
|
338
|
-
messages=[{"role": "user", "content":
|
|
193
|
+
model=self.name,
|
|
194
|
+
messages=[{"role": "user", "content": content}],
|
|
339
195
|
temperature=self.temperature,
|
|
340
196
|
**self.generation_kwargs,
|
|
341
197
|
)
|
|
@@ -354,12 +210,19 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
354
210
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
355
211
|
) -> Tuple[Union[str, BaseModel], float]:
|
|
356
212
|
client = self.load_model(async_mode=True)
|
|
213
|
+
|
|
214
|
+
if check_if_multimodal(prompt):
|
|
215
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
216
|
+
content = self.generate_content(prompt)
|
|
217
|
+
else:
|
|
218
|
+
content = [{"type": "text", "text": prompt}]
|
|
219
|
+
|
|
357
220
|
if schema:
|
|
358
|
-
if self.
|
|
221
|
+
if self.supports_structured_outputs() is True:
|
|
359
222
|
completion = await client.beta.chat.completions.parse(
|
|
360
|
-
model=self.
|
|
223
|
+
model=self.name,
|
|
361
224
|
messages=[
|
|
362
|
-
{"role": "user", "content":
|
|
225
|
+
{"role": "user", "content": content},
|
|
363
226
|
],
|
|
364
227
|
response_format=schema,
|
|
365
228
|
temperature=self.temperature,
|
|
@@ -373,11 +236,11 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
373
236
|
completion.usage.completion_tokens,
|
|
374
237
|
)
|
|
375
238
|
return structured_output, cost
|
|
376
|
-
if self.
|
|
239
|
+
if self.supports_json_mode() is True:
|
|
377
240
|
completion = await client.beta.chat.completions.parse(
|
|
378
|
-
model=self.
|
|
241
|
+
model=self.name,
|
|
379
242
|
messages=[
|
|
380
|
-
{"role": "user", "content":
|
|
243
|
+
{"role": "user", "content": content},
|
|
381
244
|
],
|
|
382
245
|
response_format={"type": "json_object"},
|
|
383
246
|
temperature=self.temperature,
|
|
@@ -393,8 +256,8 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
393
256
|
return schema.model_validate(json_output), cost
|
|
394
257
|
|
|
395
258
|
completion = await client.chat.completions.create(
|
|
396
|
-
model=self.
|
|
397
|
-
messages=[{"role": "user", "content":
|
|
259
|
+
model=self.name,
|
|
260
|
+
messages=[{"role": "user", "content": content}],
|
|
398
261
|
temperature=self.temperature,
|
|
399
262
|
**self.generation_kwargs,
|
|
400
263
|
)
|
|
@@ -408,9 +271,9 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
408
271
|
else:
|
|
409
272
|
return output, cost
|
|
410
273
|
|
|
411
|
-
|
|
412
|
-
# Other generate functions
|
|
413
|
-
|
|
274
|
+
############################
|
|
275
|
+
# Other generate functions #
|
|
276
|
+
############################
|
|
414
277
|
|
|
415
278
|
@retry_openai
|
|
416
279
|
def generate_raw_response(
|
|
@@ -419,10 +282,26 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
419
282
|
top_logprobs: int = 5,
|
|
420
283
|
) -> Tuple[ChatCompletion, float]:
|
|
421
284
|
# Generate completion
|
|
285
|
+
model_name = self.name
|
|
286
|
+
is_multimodal = check_if_multimodal(prompt)
|
|
287
|
+
|
|
288
|
+
# validate that this model supports logprobs
|
|
289
|
+
if self.supports_log_probs() is False:
|
|
290
|
+
raise DeepEvalError(
|
|
291
|
+
f"Model `{model_name}` does not support `logprobs` / `top_logprobs`. "
|
|
292
|
+
"Please use a different OpenAI model (for example `gpt-4.1` or `gpt-4o`) "
|
|
293
|
+
"when calling `generate_raw_response`."
|
|
294
|
+
)
|
|
295
|
+
|
|
422
296
|
client = self.load_model(async_mode=False)
|
|
297
|
+
if is_multimodal:
|
|
298
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
299
|
+
content = self.generate_content(prompt)
|
|
300
|
+
else:
|
|
301
|
+
content = [{"type": "text", "text": prompt}]
|
|
423
302
|
completion = client.chat.completions.create(
|
|
424
|
-
model=self.
|
|
425
|
-
messages=[{"role": "user", "content":
|
|
303
|
+
model=self.name,
|
|
304
|
+
messages=[{"role": "user", "content": content}],
|
|
426
305
|
temperature=self.temperature,
|
|
427
306
|
logprobs=True,
|
|
428
307
|
top_logprobs=top_logprobs,
|
|
@@ -442,10 +321,26 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
442
321
|
top_logprobs: int = 5,
|
|
443
322
|
) -> Tuple[ChatCompletion, float]:
|
|
444
323
|
# Generate completion
|
|
324
|
+
model_name = self.name
|
|
325
|
+
is_multimodal = check_if_multimodal(prompt)
|
|
326
|
+
|
|
327
|
+
# validate that this model supports logprobs
|
|
328
|
+
if self.supports_log_probs() is False:
|
|
329
|
+
raise DeepEvalError(
|
|
330
|
+
f"Model `{model_name}` does not support `logprobs` / `top_logprobs`. "
|
|
331
|
+
"Please use a different OpenAI model (for example `gpt-4.1` or `gpt-4o`) "
|
|
332
|
+
"when calling `a_generate_raw_response`."
|
|
333
|
+
)
|
|
334
|
+
|
|
445
335
|
client = self.load_model(async_mode=True)
|
|
336
|
+
if is_multimodal:
|
|
337
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
338
|
+
content = self.generate_content(prompt)
|
|
339
|
+
else:
|
|
340
|
+
content = [{"type": "text", "text": prompt}]
|
|
446
341
|
completion = await client.chat.completions.create(
|
|
447
|
-
model=self.
|
|
448
|
-
messages=[{"role": "user", "content":
|
|
342
|
+
model=self.name,
|
|
343
|
+
messages=[{"role": "user", "content": content}],
|
|
449
344
|
temperature=self.temperature,
|
|
450
345
|
logprobs=True,
|
|
451
346
|
top_logprobs=top_logprobs,
|
|
@@ -461,11 +356,16 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
461
356
|
@retry_openai
|
|
462
357
|
def generate_samples(
|
|
463
358
|
self, prompt: str, n: int, temperature: float
|
|
464
|
-
) ->
|
|
359
|
+
) -> list[str]:
|
|
465
360
|
client = self.load_model(async_mode=False)
|
|
361
|
+
if check_if_multimodal(prompt):
|
|
362
|
+
prompt = convert_to_multi_modal_array(input=prompt)
|
|
363
|
+
content = self.generate_content(prompt)
|
|
364
|
+
else:
|
|
365
|
+
content = [{"type": "text", "text": prompt}]
|
|
466
366
|
response = client.chat.completions.create(
|
|
467
|
-
model=self.
|
|
468
|
-
messages=[{"role": "user", "content":
|
|
367
|
+
model=self.name,
|
|
368
|
+
messages=[{"role": "user", "content": content}],
|
|
469
369
|
n=n,
|
|
470
370
|
temperature=temperature,
|
|
471
371
|
**self.generation_kwargs,
|
|
@@ -473,23 +373,73 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
473
373
|
completions = [choice.message.content for choice in response.choices]
|
|
474
374
|
return completions
|
|
475
375
|
|
|
476
|
-
|
|
477
|
-
# Utilities
|
|
478
|
-
|
|
376
|
+
#############
|
|
377
|
+
# Utilities #
|
|
378
|
+
#############
|
|
479
379
|
|
|
480
380
|
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
input_cost = input_tokens * pricing["input"]
|
|
484
|
-
output_cost = output_tokens * pricing["output"]
|
|
381
|
+
input_cost = input_tokens * self.model_data.input_price
|
|
382
|
+
output_cost = output_tokens * self.model_data.output_price
|
|
485
383
|
return input_cost + output_cost
|
|
486
384
|
|
|
385
|
+
#########################
|
|
386
|
+
# Capabilities #
|
|
387
|
+
#########################
|
|
388
|
+
|
|
389
|
+
def supports_log_probs(self) -> Union[bool, None]:
|
|
390
|
+
return self.model_data.supports_log_probs
|
|
391
|
+
|
|
392
|
+
def supports_temperature(self) -> Union[bool, None]:
|
|
393
|
+
return self.model_data.supports_temperature
|
|
394
|
+
|
|
395
|
+
def supports_multimodal(self) -> Union[bool, None]:
|
|
396
|
+
return self.model_data.supports_multimodal
|
|
397
|
+
|
|
398
|
+
def supports_structured_outputs(self) -> Union[bool, None]:
|
|
399
|
+
"""
|
|
400
|
+
OpenAI models that natively enforce typed structured outputs.
|
|
401
|
+
Used by generate(...) when a schema is provided.
|
|
402
|
+
"""
|
|
403
|
+
return self.model_data.supports_structured_outputs
|
|
404
|
+
|
|
405
|
+
def supports_json_mode(self) -> Union[bool, None]:
|
|
406
|
+
"""
|
|
407
|
+
OpenAI models that enforce JSON mode
|
|
408
|
+
"""
|
|
409
|
+
return self.model_data.supports_json
|
|
410
|
+
|
|
487
411
|
#########
|
|
488
412
|
# Model #
|
|
489
413
|
#########
|
|
490
414
|
|
|
491
|
-
def
|
|
492
|
-
|
|
415
|
+
def generate_content(
|
|
416
|
+
self, multimodal_input: Optional[List[Union[str, MLLMImage]]] = None
|
|
417
|
+
):
|
|
418
|
+
multimodal_input = [] if multimodal_input is None else multimodal_input
|
|
419
|
+
content = []
|
|
420
|
+
for element in multimodal_input:
|
|
421
|
+
if isinstance(element, str):
|
|
422
|
+
content.append({"type": "text", "text": element})
|
|
423
|
+
elif isinstance(element, MLLMImage):
|
|
424
|
+
if element.url and not element.local:
|
|
425
|
+
content.append(
|
|
426
|
+
{
|
|
427
|
+
"type": "image_url",
|
|
428
|
+
"image_url": {"url": element.url},
|
|
429
|
+
}
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
element.ensure_images_loaded()
|
|
433
|
+
data_uri = (
|
|
434
|
+
f"data:{element.mimeType};base64,{element.dataBase64}"
|
|
435
|
+
)
|
|
436
|
+
content.append(
|
|
437
|
+
{
|
|
438
|
+
"type": "image_url",
|
|
439
|
+
"image_url": {"url": data_uri},
|
|
440
|
+
}
|
|
441
|
+
)
|
|
442
|
+
return content
|
|
493
443
|
|
|
494
444
|
def load_model(self, async_mode: bool = False):
|
|
495
445
|
if not async_mode:
|
|
@@ -512,10 +462,10 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
512
462
|
|
|
513
463
|
def _build_client(self, cls):
|
|
514
464
|
api_key = require_secret_api_key(
|
|
515
|
-
self.
|
|
465
|
+
self.api_key,
|
|
516
466
|
provider_label="OpenAI",
|
|
517
467
|
env_var_name="OPENAI_API_KEY",
|
|
518
|
-
param_hint="`
|
|
468
|
+
param_hint="`api_key` to GPTModel(...)",
|
|
519
469
|
)
|
|
520
470
|
|
|
521
471
|
kw = dict(
|
|
@@ -531,3 +481,6 @@ class GPTModel(DeepEvalBaseLLM):
|
|
|
531
481
|
kw.pop("max_retries", None)
|
|
532
482
|
return cls(**kw)
|
|
533
483
|
raise
|
|
484
|
+
|
|
485
|
+
def get_model_name(self):
|
|
486
|
+
return f"{self.name}"
|