deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
from typing import Optional, Tuple, List, Union, Dict
|
|
3
|
-
from openai import OpenAI, AsyncOpenAI
|
|
4
|
-
from openai.types.chat import ParsedChatCompletion
|
|
5
|
-
from pydantic import BaseModel, SecretStr
|
|
6
|
-
from io import BytesIO
|
|
7
|
-
|
|
8
|
-
from deepeval.config.settings import get_settings
|
|
9
|
-
from deepeval.models.llms.openai_model import (
|
|
10
|
-
model_pricing,
|
|
11
|
-
structured_outputs_models,
|
|
12
|
-
_request_timeout_seconds,
|
|
13
|
-
)
|
|
14
|
-
from deepeval.models import DeepEvalBaseMLLM
|
|
15
|
-
from deepeval.models.llms.utils import trim_and_load_json
|
|
16
|
-
from deepeval.test_case import MLLMImage
|
|
17
|
-
from deepeval.models.utils import parse_model_name, require_secret_api_key
|
|
18
|
-
from deepeval.models.retry_policy import (
|
|
19
|
-
create_retry_decorator,
|
|
20
|
-
sdk_retries_for,
|
|
21
|
-
)
|
|
22
|
-
from deepeval.constants import ProviderSlug as PS
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
retry_openai = create_retry_decorator(PS.OPENAI)
|
|
26
|
-
|
|
27
|
-
valid_multimodal_gpt_models = [
|
|
28
|
-
"gpt-4o",
|
|
29
|
-
"gpt-4o-2024-05-13",
|
|
30
|
-
"gpt-4o-2024-08-06",
|
|
31
|
-
"gpt-4o-2024-11-20",
|
|
32
|
-
"gpt-4o-mini",
|
|
33
|
-
"gpt-4o-mini-2024-07-18",
|
|
34
|
-
"gpt-4.1",
|
|
35
|
-
"gpt-4.1-mini",
|
|
36
|
-
"gpt-4.1-nano",
|
|
37
|
-
"o1",
|
|
38
|
-
"o1-preview",
|
|
39
|
-
"o1-2024-12-17",
|
|
40
|
-
"o1-preview-2024-09-12",
|
|
41
|
-
"gpt-4.5-preview-2025-02-27",
|
|
42
|
-
"o4-mini",
|
|
43
|
-
]
|
|
44
|
-
|
|
45
|
-
default_multimodal_gpt_model = "gpt-4.1"
|
|
46
|
-
|
|
47
|
-
unsupported_log_probs_multimodal_gpt_models = [
|
|
48
|
-
"o1",
|
|
49
|
-
"o1-preview",
|
|
50
|
-
"o1-2024-12-17",
|
|
51
|
-
"o1-preview-2024-09-12",
|
|
52
|
-
"gpt-4.5-preview-2025-02-27",
|
|
53
|
-
"o4-mini",
|
|
54
|
-
]
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class MultimodalOpenAIModel(DeepEvalBaseMLLM):
|
|
58
|
-
def __init__(
|
|
59
|
-
self,
|
|
60
|
-
model: Optional[str] = None,
|
|
61
|
-
_openai_api_key: Optional[str] = None,
|
|
62
|
-
*args,
|
|
63
|
-
**kwargs,
|
|
64
|
-
):
|
|
65
|
-
settings = get_settings()
|
|
66
|
-
model_name = None
|
|
67
|
-
if isinstance(model, str):
|
|
68
|
-
model_name = parse_model_name(model)
|
|
69
|
-
if model_name not in valid_multimodal_gpt_models:
|
|
70
|
-
raise ValueError(
|
|
71
|
-
f"Invalid model. Available Multimodal GPT models: "
|
|
72
|
-
f"{', '.join(model for model in valid_multimodal_gpt_models)}"
|
|
73
|
-
)
|
|
74
|
-
elif settings.OPENAI_MODEL_NAME is not None:
|
|
75
|
-
model_name = settings.OPENAI_MODEL_NAME
|
|
76
|
-
elif model is None:
|
|
77
|
-
model_name = default_multimodal_gpt_model
|
|
78
|
-
|
|
79
|
-
if _openai_api_key is not None:
|
|
80
|
-
# keep it secret, keep it safe from serializings, logging and aolike
|
|
81
|
-
self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
|
|
82
|
-
else:
|
|
83
|
-
self._openai_api_key = settings.OPENAI_API_KEY
|
|
84
|
-
|
|
85
|
-
self.args = args
|
|
86
|
-
self.kwargs = kwargs
|
|
87
|
-
|
|
88
|
-
super().__init__(model_name, *args, **kwargs)
|
|
89
|
-
|
|
90
|
-
###############################################
|
|
91
|
-
# Generate functions
|
|
92
|
-
###############################################
|
|
93
|
-
|
|
94
|
-
@retry_openai
|
|
95
|
-
def generate(
|
|
96
|
-
self,
|
|
97
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
98
|
-
schema: Optional[BaseModel] = None,
|
|
99
|
-
) -> Tuple[str, float]:
|
|
100
|
-
client = self.load_model(async_mode=False)
|
|
101
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
102
|
-
|
|
103
|
-
if schema:
|
|
104
|
-
if self.model_name in structured_outputs_models:
|
|
105
|
-
messages = [{"role": "user", "content": prompt}]
|
|
106
|
-
response = client.beta.chat.completions.parse(
|
|
107
|
-
model=self.model_name,
|
|
108
|
-
messages=messages,
|
|
109
|
-
response_format=schema,
|
|
110
|
-
)
|
|
111
|
-
input_tokens = response.usage.prompt_tokens
|
|
112
|
-
output_tokens = response.usage.completion_tokens
|
|
113
|
-
total_cost = self.calculate_cost(input_tokens, output_tokens)
|
|
114
|
-
generated_text = response.choices[0].message.parsed
|
|
115
|
-
return generated_text, total_cost
|
|
116
|
-
|
|
117
|
-
completion = client.chat.completions.create(
|
|
118
|
-
model=self.model_name,
|
|
119
|
-
messages=[{"role": "user", "content": prompt}],
|
|
120
|
-
)
|
|
121
|
-
output = completion.choices[0].message.content
|
|
122
|
-
cost = self.calculate_cost(
|
|
123
|
-
completion.usage.prompt_tokens, completion.usage.completion_tokens
|
|
124
|
-
)
|
|
125
|
-
if schema:
|
|
126
|
-
json_output = trim_and_load_json(output)
|
|
127
|
-
return schema.model_validate(json_output), cost
|
|
128
|
-
else:
|
|
129
|
-
return output, cost
|
|
130
|
-
|
|
131
|
-
@retry_openai
|
|
132
|
-
async def a_generate(
|
|
133
|
-
self,
|
|
134
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
135
|
-
schema: Optional[BaseModel] = None,
|
|
136
|
-
) -> Tuple[str, float]:
|
|
137
|
-
client = self.load_model(async_mode=True)
|
|
138
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
139
|
-
|
|
140
|
-
if schema:
|
|
141
|
-
if self.model_name in structured_outputs_models:
|
|
142
|
-
messages = [{"role": "user", "content": prompt}]
|
|
143
|
-
response = await client.beta.chat.completions.parse(
|
|
144
|
-
model=self.model_name,
|
|
145
|
-
messages=messages,
|
|
146
|
-
response_format=schema,
|
|
147
|
-
)
|
|
148
|
-
input_tokens = response.usage.prompt_tokens
|
|
149
|
-
output_tokens = response.usage.completion_tokens
|
|
150
|
-
total_cost = self.calculate_cost(input_tokens, output_tokens)
|
|
151
|
-
generated_text = response.choices[0].message.parsed
|
|
152
|
-
return generated_text, total_cost
|
|
153
|
-
|
|
154
|
-
completion = await client.chat.completions.create(
|
|
155
|
-
model=self.model_name,
|
|
156
|
-
messages=[{"role": "user", "content": prompt}],
|
|
157
|
-
)
|
|
158
|
-
output = completion.choices[0].message.content
|
|
159
|
-
cost = self.calculate_cost(
|
|
160
|
-
completion.usage.prompt_tokens, completion.usage.completion_tokens
|
|
161
|
-
)
|
|
162
|
-
if schema:
|
|
163
|
-
json_output = trim_and_load_json(output)
|
|
164
|
-
return schema.model_validate(json_output), cost
|
|
165
|
-
else:
|
|
166
|
-
return output, cost
|
|
167
|
-
|
|
168
|
-
###############################################
|
|
169
|
-
# Other generate functions
|
|
170
|
-
###############################################
|
|
171
|
-
|
|
172
|
-
@retry_openai
|
|
173
|
-
def generate_raw_response(
|
|
174
|
-
self,
|
|
175
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
176
|
-
top_logprobs: int = 5,
|
|
177
|
-
) -> Tuple[ParsedChatCompletion, float]:
|
|
178
|
-
client = self._client()
|
|
179
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
180
|
-
messages = [{"role": "user", "content": prompt}]
|
|
181
|
-
completion = client.chat.completions.create(
|
|
182
|
-
model=self.model_name,
|
|
183
|
-
messages=messages,
|
|
184
|
-
logprobs=True,
|
|
185
|
-
top_logprobs=top_logprobs,
|
|
186
|
-
)
|
|
187
|
-
# Cost calculation
|
|
188
|
-
input_tokens = completion.usage.prompt_tokens
|
|
189
|
-
output_tokens = completion.usage.completion_tokens
|
|
190
|
-
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
191
|
-
return completion, cost
|
|
192
|
-
|
|
193
|
-
@retry_openai
|
|
194
|
-
async def a_generate_raw_response(
|
|
195
|
-
self,
|
|
196
|
-
multimodal_input: List[Union[str, MLLMImage]],
|
|
197
|
-
top_logprobs: int = 5,
|
|
198
|
-
) -> Tuple[ParsedChatCompletion, float]:
|
|
199
|
-
client = self._client(async_mode=True)
|
|
200
|
-
prompt = self.generate_prompt(multimodal_input)
|
|
201
|
-
messages = [{"role": "user", "content": prompt}]
|
|
202
|
-
completion = await client.chat.completions.create(
|
|
203
|
-
model=self.model_name,
|
|
204
|
-
messages=messages,
|
|
205
|
-
logprobs=True,
|
|
206
|
-
top_logprobs=top_logprobs,
|
|
207
|
-
)
|
|
208
|
-
# Cost calculation
|
|
209
|
-
input_tokens = completion.usage.prompt_tokens
|
|
210
|
-
output_tokens = completion.usage.completion_tokens
|
|
211
|
-
cost = self.calculate_cost(input_tokens, output_tokens)
|
|
212
|
-
return completion, cost
|
|
213
|
-
|
|
214
|
-
###############################################
|
|
215
|
-
# Utilities
|
|
216
|
-
###############################################
|
|
217
|
-
|
|
218
|
-
def generate_prompt(
|
|
219
|
-
self, multimodal_input: List[Union[str, MLLMImage]] = []
|
|
220
|
-
):
|
|
221
|
-
prompt = []
|
|
222
|
-
for ele in multimodal_input:
|
|
223
|
-
if isinstance(ele, str):
|
|
224
|
-
prompt.append({"type": "text", "text": ele})
|
|
225
|
-
elif isinstance(ele, MLLMImage):
|
|
226
|
-
if ele.local:
|
|
227
|
-
import PIL.Image
|
|
228
|
-
|
|
229
|
-
image = PIL.Image.open(ele.url)
|
|
230
|
-
visual_dict = {
|
|
231
|
-
"type": "image_url",
|
|
232
|
-
"image_url": {
|
|
233
|
-
"url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
|
|
234
|
-
},
|
|
235
|
-
}
|
|
236
|
-
else:
|
|
237
|
-
visual_dict = {
|
|
238
|
-
"type": "image_url",
|
|
239
|
-
"image_url": {"url": ele.url},
|
|
240
|
-
}
|
|
241
|
-
prompt.append(visual_dict)
|
|
242
|
-
return prompt
|
|
243
|
-
|
|
244
|
-
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
245
|
-
pricing = model_pricing.get(
|
|
246
|
-
self.model_name, model_pricing["gpt-4.1"]
|
|
247
|
-
) # Default to 'gpt-4.1' if model not found
|
|
248
|
-
input_cost = input_tokens * pricing["input"]
|
|
249
|
-
output_cost = output_tokens * pricing["output"]
|
|
250
|
-
return input_cost + output_cost
|
|
251
|
-
|
|
252
|
-
def encode_pil_image(self, pil_image):
|
|
253
|
-
image_buffer = BytesIO()
|
|
254
|
-
if pil_image.mode in ("RGBA", "LA", "P"):
|
|
255
|
-
pil_image = pil_image.convert("RGB")
|
|
256
|
-
pil_image.save(image_buffer, format="JPEG")
|
|
257
|
-
image_bytes = image_buffer.getvalue()
|
|
258
|
-
base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
|
|
259
|
-
return base64_encoded_image
|
|
260
|
-
|
|
261
|
-
###############################################
|
|
262
|
-
# Model
|
|
263
|
-
###############################################
|
|
264
|
-
|
|
265
|
-
def get_model_name(self):
|
|
266
|
-
return self.model_name
|
|
267
|
-
|
|
268
|
-
def load_model(self, async_mode: bool = False):
|
|
269
|
-
Client = AsyncOpenAI if async_mode else OpenAI
|
|
270
|
-
return self._build_client(Client)
|
|
271
|
-
|
|
272
|
-
def _client_kwargs(self) -> Dict:
|
|
273
|
-
"""
|
|
274
|
-
If Tenacity is managing retries, force OpenAI SDK retries off to avoid
|
|
275
|
-
double retries. If the user opts into SDK retries for 'openai' via
|
|
276
|
-
DEEPEVAL_SDK_RETRY_PROVIDERS, leave their retry settings as is.
|
|
277
|
-
"""
|
|
278
|
-
kwargs: Dict = {}
|
|
279
|
-
if not sdk_retries_for(PS.OPENAI):
|
|
280
|
-
kwargs["max_retries"] = 0
|
|
281
|
-
|
|
282
|
-
if not kwargs.get("timeout"):
|
|
283
|
-
kwargs["timeout"] = _request_timeout_seconds()
|
|
284
|
-
return kwargs
|
|
285
|
-
|
|
286
|
-
def _build_client(self, cls):
|
|
287
|
-
api_key = require_secret_api_key(
|
|
288
|
-
self._openai_api_key,
|
|
289
|
-
provider_label="OpenAI",
|
|
290
|
-
env_var_name="OPENAI_API_KEY",
|
|
291
|
-
param_hint="`_openai_api_key` to MultimodalOpenAIModel(...)",
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
kw = dict(
|
|
295
|
-
api_key=api_key,
|
|
296
|
-
**self._client_kwargs(),
|
|
297
|
-
)
|
|
298
|
-
try:
|
|
299
|
-
return cls(**kw)
|
|
300
|
-
except TypeError as e:
|
|
301
|
-
# older OpenAI SDKs may not accept max_retries, in that case remove and retry once
|
|
302
|
-
if "max_retries" in str(e):
|
|
303
|
-
kw.pop("max_retries", None)
|
|
304
|
-
return cls(**kw)
|
|
305
|
-
raise
|
|
306
|
-
|
|
307
|
-
def _client(self, async_mode: bool = False):
|
|
308
|
-
# Backwards-compat path for internal callers in this module
|
|
309
|
-
return self.load_model(async_mode=async_mode)
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from deepeval.optimization.prompt_optimizer import PromptOptimizer
|
|
2
|
-
from deepeval.optimization.configs import OptimizerDisplayConfig
|
|
3
|
-
from deepeval.optimization.gepa.loop import (
|
|
4
|
-
GEPARunner as GEPARunner,
|
|
5
|
-
GEPAConfig as GEPAConfig,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"GEPARunner",
|
|
10
|
-
"GEPAConfig",
|
|
11
|
-
"PromptOptimizer",
|
|
12
|
-
"OptimizerDisplayConfig",
|
|
13
|
-
]
|