deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +35 -1
  3. deepeval/dataset/api.py +23 -1
  4. deepeval/dataset/golden.py +139 -2
  5. deepeval/evaluate/evaluate.py +16 -11
  6. deepeval/evaluate/execute.py +13 -181
  7. deepeval/evaluate/utils.py +6 -26
  8. deepeval/integrations/pydantic_ai/agent.py +19 -2
  9. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  10. deepeval/key_handler.py +3 -0
  11. deepeval/metrics/__init__.py +14 -16
  12. deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
  13. deepeval/metrics/answer_relevancy/template.py +22 -3
  14. deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  15. deepeval/metrics/arena_g_eval/template.py +17 -1
  16. deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  17. deepeval/metrics/argument_correctness/template.py +19 -2
  18. deepeval/metrics/base_metric.py +13 -44
  19. deepeval/metrics/bias/bias.py +102 -108
  20. deepeval/metrics/bias/template.py +14 -2
  21. deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
  22. deepeval/metrics/contextual_precision/template.py +115 -66
  23. deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
  24. deepeval/metrics/contextual_recall/template.py +106 -55
  25. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
  26. deepeval/metrics/contextual_relevancy/template.py +87 -58
  27. deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  28. deepeval/metrics/conversation_completeness/template.py +23 -3
  29. deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  30. deepeval/metrics/conversational_dag/nodes.py +66 -123
  31. deepeval/metrics/conversational_dag/templates.py +16 -0
  32. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  33. deepeval/metrics/dag/dag.py +10 -0
  34. deepeval/metrics/dag/nodes.py +63 -126
  35. deepeval/metrics/dag/templates.py +16 -2
  36. deepeval/metrics/exact_match/exact_match.py +9 -1
  37. deepeval/metrics/faithfulness/faithfulness.py +138 -149
  38. deepeval/metrics/faithfulness/schema.py +1 -1
  39. deepeval/metrics/faithfulness/template.py +200 -115
  40. deepeval/metrics/g_eval/g_eval.py +87 -78
  41. deepeval/metrics/g_eval/template.py +18 -1
  42. deepeval/metrics/g_eval/utils.py +7 -6
  43. deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  44. deepeval/metrics/goal_accuracy/template.py +21 -3
  45. deepeval/metrics/hallucination/hallucination.py +60 -75
  46. deepeval/metrics/hallucination/template.py +13 -0
  47. deepeval/metrics/indicator.py +7 -10
  48. deepeval/metrics/json_correctness/json_correctness.py +40 -38
  49. deepeval/metrics/json_correctness/template.py +10 -0
  50. deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  51. deepeval/metrics/knowledge_retention/schema.py +9 -3
  52. deepeval/metrics/knowledge_retention/template.py +12 -0
  53. deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  54. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  55. deepeval/metrics/mcp/template.py +52 -0
  56. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  57. deepeval/metrics/mcp_use_metric/template.py +12 -0
  58. deepeval/metrics/misuse/misuse.py +77 -97
  59. deepeval/metrics/misuse/template.py +15 -0
  60. deepeval/metrics/multimodal_metrics/__init__.py +0 -19
  61. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
  62. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
  63. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
  64. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
  65. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
  66. deepeval/metrics/non_advice/non_advice.py +79 -105
  67. deepeval/metrics/non_advice/template.py +12 -0
  68. deepeval/metrics/pattern_match/pattern_match.py +12 -4
  69. deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  70. deepeval/metrics/pii_leakage/template.py +14 -0
  71. deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  72. deepeval/metrics/plan_adherence/template.py +11 -0
  73. deepeval/metrics/plan_quality/plan_quality.py +63 -87
  74. deepeval/metrics/plan_quality/template.py +9 -0
  75. deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  76. deepeval/metrics/prompt_alignment/template.py +12 -0
  77. deepeval/metrics/ragas.py +3 -3
  78. deepeval/metrics/role_adherence/role_adherence.py +48 -71
  79. deepeval/metrics/role_adherence/template.py +14 -0
  80. deepeval/metrics/role_violation/role_violation.py +75 -108
  81. deepeval/metrics/role_violation/template.py +12 -0
  82. deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  83. deepeval/metrics/step_efficiency/template.py +11 -0
  84. deepeval/metrics/summarization/summarization.py +115 -183
  85. deepeval/metrics/summarization/template.py +19 -0
  86. deepeval/metrics/task_completion/task_completion.py +67 -73
  87. deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
  88. deepeval/metrics/tool_use/tool_use.py +42 -66
  89. deepeval/metrics/topic_adherence/template.py +13 -0
  90. deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  91. deepeval/metrics/toxicity/template.py +13 -0
  92. deepeval/metrics/toxicity/toxicity.py +80 -99
  93. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  94. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  95. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
  96. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  97. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  98. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
  99. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  100. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  101. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
  102. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  103. deepeval/metrics/turn_faithfulness/template.py +218 -0
  104. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
  105. deepeval/metrics/turn_relevancy/template.py +14 -0
  106. deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  107. deepeval/metrics/utils.py +158 -122
  108. deepeval/models/__init__.py +0 -12
  109. deepeval/models/base_model.py +49 -33
  110. deepeval/models/embedding_models/__init__.py +7 -0
  111. deepeval/models/embedding_models/azure_embedding_model.py +79 -33
  112. deepeval/models/embedding_models/local_embedding_model.py +39 -20
  113. deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
  114. deepeval/models/embedding_models/openai_embedding_model.py +42 -22
  115. deepeval/models/llms/amazon_bedrock_model.py +226 -72
  116. deepeval/models/llms/anthropic_model.py +178 -63
  117. deepeval/models/llms/azure_model.py +218 -60
  118. deepeval/models/llms/constants.py +2032 -0
  119. deepeval/models/llms/deepseek_model.py +95 -40
  120. deepeval/models/llms/gemini_model.py +209 -64
  121. deepeval/models/llms/grok_model.py +139 -68
  122. deepeval/models/llms/kimi_model.py +140 -90
  123. deepeval/models/llms/litellm_model.py +131 -37
  124. deepeval/models/llms/local_model.py +125 -21
  125. deepeval/models/llms/ollama_model.py +147 -24
  126. deepeval/models/llms/openai_model.py +222 -269
  127. deepeval/models/llms/portkey_model.py +81 -22
  128. deepeval/models/llms/utils.py +8 -3
  129. deepeval/models/retry_policy.py +17 -14
  130. deepeval/models/utils.py +106 -5
  131. deepeval/optimizer/__init__.py +5 -0
  132. deepeval/optimizer/algorithms/__init__.py +6 -0
  133. deepeval/optimizer/algorithms/base.py +29 -0
  134. deepeval/optimizer/algorithms/configs.py +18 -0
  135. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  136. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  137. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  138. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  139. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  140. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  141. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  142. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  143. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  144. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  145. deepeval/{optimization → optimizer}/configs.py +5 -8
  146. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  147. deepeval/optimizer/prompt_optimizer.py +263 -0
  148. deepeval/optimizer/rewriter/__init__.py +5 -0
  149. deepeval/optimizer/rewriter/rewriter.py +124 -0
  150. deepeval/optimizer/rewriter/utils.py +214 -0
  151. deepeval/optimizer/scorer/__init__.py +5 -0
  152. deepeval/optimizer/scorer/base.py +86 -0
  153. deepeval/optimizer/scorer/scorer.py +316 -0
  154. deepeval/optimizer/scorer/utils.py +30 -0
  155. deepeval/optimizer/types.py +148 -0
  156. deepeval/{optimization → optimizer}/utils.py +47 -165
  157. deepeval/prompt/prompt.py +5 -9
  158. deepeval/simulator/conversation_simulator.py +43 -0
  159. deepeval/simulator/template.py +13 -0
  160. deepeval/test_case/__init__.py +1 -3
  161. deepeval/test_case/api.py +26 -45
  162. deepeval/test_case/arena_test_case.py +7 -2
  163. deepeval/test_case/conversational_test_case.py +68 -1
  164. deepeval/test_case/llm_test_case.py +206 -1
  165. deepeval/test_case/utils.py +4 -8
  166. deepeval/test_run/api.py +18 -14
  167. deepeval/test_run/test_run.py +3 -3
  168. deepeval/tracing/patchers.py +9 -4
  169. deepeval/tracing/tracing.py +2 -2
  170. deepeval/utils.py +65 -0
  171. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
  172. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
  173. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  174. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  175. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  176. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  177. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  178. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  179. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  180. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  181. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  182. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  183. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  184. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  185. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  186. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  187. deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  188. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  189. deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  190. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  191. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  192. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  193. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  194. deepeval/models/mlllms/__init__.py +0 -4
  195. deepeval/models/mlllms/azure_model.py +0 -343
  196. deepeval/models/mlllms/gemini_model.py +0 -313
  197. deepeval/models/mlllms/ollama_model.py +0 -175
  198. deepeval/models/mlllms/openai_model.py +0 -309
  199. deepeval/optimization/__init__.py +0 -13
  200. deepeval/optimization/adapters/__init__.py +0 -2
  201. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  202. deepeval/optimization/aggregates.py +0 -14
  203. deepeval/optimization/copro/configs.py +0 -31
  204. deepeval/optimization/gepa/__init__.py +0 -7
  205. deepeval/optimization/gepa/configs.py +0 -115
  206. deepeval/optimization/miprov2/configs.py +0 -134
  207. deepeval/optimization/miprov2/loop.py +0 -785
  208. deepeval/optimization/mutations/__init__.py +0 -0
  209. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  210. deepeval/optimization/policies/__init__.py +0 -16
  211. deepeval/optimization/policies/tie_breaker.py +0 -67
  212. deepeval/optimization/prompt_optimizer.py +0 -462
  213. deepeval/optimization/simba/__init__.py +0 -0
  214. deepeval/optimization/simba/configs.py +0 -33
  215. deepeval/optimization/types.py +0 -361
  216. deepeval/test_case/mllm_test_case.py +0 -170
  217. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  218. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  219. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  220. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  221. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  222. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
  223. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
  224. {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
@@ -1,309 +0,0 @@
1
- import base64
2
- from typing import Optional, Tuple, List, Union, Dict
3
- from openai import OpenAI, AsyncOpenAI
4
- from openai.types.chat import ParsedChatCompletion
5
- from pydantic import BaseModel, SecretStr
6
- from io import BytesIO
7
-
8
- from deepeval.config.settings import get_settings
9
- from deepeval.models.llms.openai_model import (
10
- model_pricing,
11
- structured_outputs_models,
12
- _request_timeout_seconds,
13
- )
14
- from deepeval.models import DeepEvalBaseMLLM
15
- from deepeval.models.llms.utils import trim_and_load_json
16
- from deepeval.test_case import MLLMImage
17
- from deepeval.models.utils import parse_model_name, require_secret_api_key
18
- from deepeval.models.retry_policy import (
19
- create_retry_decorator,
20
- sdk_retries_for,
21
- )
22
- from deepeval.constants import ProviderSlug as PS
23
-
24
-
25
- retry_openai = create_retry_decorator(PS.OPENAI)
26
-
27
- valid_multimodal_gpt_models = [
28
- "gpt-4o",
29
- "gpt-4o-2024-05-13",
30
- "gpt-4o-2024-08-06",
31
- "gpt-4o-2024-11-20",
32
- "gpt-4o-mini",
33
- "gpt-4o-mini-2024-07-18",
34
- "gpt-4.1",
35
- "gpt-4.1-mini",
36
- "gpt-4.1-nano",
37
- "o1",
38
- "o1-preview",
39
- "o1-2024-12-17",
40
- "o1-preview-2024-09-12",
41
- "gpt-4.5-preview-2025-02-27",
42
- "o4-mini",
43
- ]
44
-
45
- default_multimodal_gpt_model = "gpt-4.1"
46
-
47
- unsupported_log_probs_multimodal_gpt_models = [
48
- "o1",
49
- "o1-preview",
50
- "o1-2024-12-17",
51
- "o1-preview-2024-09-12",
52
- "gpt-4.5-preview-2025-02-27",
53
- "o4-mini",
54
- ]
55
-
56
-
57
- class MultimodalOpenAIModel(DeepEvalBaseMLLM):
58
- def __init__(
59
- self,
60
- model: Optional[str] = None,
61
- _openai_api_key: Optional[str] = None,
62
- *args,
63
- **kwargs,
64
- ):
65
- settings = get_settings()
66
- model_name = None
67
- if isinstance(model, str):
68
- model_name = parse_model_name(model)
69
- if model_name not in valid_multimodal_gpt_models:
70
- raise ValueError(
71
- f"Invalid model. Available Multimodal GPT models: "
72
- f"{', '.join(model for model in valid_multimodal_gpt_models)}"
73
- )
74
- elif settings.OPENAI_MODEL_NAME is not None:
75
- model_name = settings.OPENAI_MODEL_NAME
76
- elif model is None:
77
- model_name = default_multimodal_gpt_model
78
-
79
- if _openai_api_key is not None:
80
- # keep it secret, keep it safe from serializings, logging and aolike
81
- self._openai_api_key: SecretStr | None = SecretStr(_openai_api_key)
82
- else:
83
- self._openai_api_key = settings.OPENAI_API_KEY
84
-
85
- self.args = args
86
- self.kwargs = kwargs
87
-
88
- super().__init__(model_name, *args, **kwargs)
89
-
90
- ###############################################
91
- # Generate functions
92
- ###############################################
93
-
94
- @retry_openai
95
- def generate(
96
- self,
97
- multimodal_input: List[Union[str, MLLMImage]],
98
- schema: Optional[BaseModel] = None,
99
- ) -> Tuple[str, float]:
100
- client = self.load_model(async_mode=False)
101
- prompt = self.generate_prompt(multimodal_input)
102
-
103
- if schema:
104
- if self.model_name in structured_outputs_models:
105
- messages = [{"role": "user", "content": prompt}]
106
- response = client.beta.chat.completions.parse(
107
- model=self.model_name,
108
- messages=messages,
109
- response_format=schema,
110
- )
111
- input_tokens = response.usage.prompt_tokens
112
- output_tokens = response.usage.completion_tokens
113
- total_cost = self.calculate_cost(input_tokens, output_tokens)
114
- generated_text = response.choices[0].message.parsed
115
- return generated_text, total_cost
116
-
117
- completion = client.chat.completions.create(
118
- model=self.model_name,
119
- messages=[{"role": "user", "content": prompt}],
120
- )
121
- output = completion.choices[0].message.content
122
- cost = self.calculate_cost(
123
- completion.usage.prompt_tokens, completion.usage.completion_tokens
124
- )
125
- if schema:
126
- json_output = trim_and_load_json(output)
127
- return schema.model_validate(json_output), cost
128
- else:
129
- return output, cost
130
-
131
- @retry_openai
132
- async def a_generate(
133
- self,
134
- multimodal_input: List[Union[str, MLLMImage]],
135
- schema: Optional[BaseModel] = None,
136
- ) -> Tuple[str, float]:
137
- client = self.load_model(async_mode=True)
138
- prompt = self.generate_prompt(multimodal_input)
139
-
140
- if schema:
141
- if self.model_name in structured_outputs_models:
142
- messages = [{"role": "user", "content": prompt}]
143
- response = await client.beta.chat.completions.parse(
144
- model=self.model_name,
145
- messages=messages,
146
- response_format=schema,
147
- )
148
- input_tokens = response.usage.prompt_tokens
149
- output_tokens = response.usage.completion_tokens
150
- total_cost = self.calculate_cost(input_tokens, output_tokens)
151
- generated_text = response.choices[0].message.parsed
152
- return generated_text, total_cost
153
-
154
- completion = await client.chat.completions.create(
155
- model=self.model_name,
156
- messages=[{"role": "user", "content": prompt}],
157
- )
158
- output = completion.choices[0].message.content
159
- cost = self.calculate_cost(
160
- completion.usage.prompt_tokens, completion.usage.completion_tokens
161
- )
162
- if schema:
163
- json_output = trim_and_load_json(output)
164
- return schema.model_validate(json_output), cost
165
- else:
166
- return output, cost
167
-
168
- ###############################################
169
- # Other generate functions
170
- ###############################################
171
-
172
- @retry_openai
173
- def generate_raw_response(
174
- self,
175
- multimodal_input: List[Union[str, MLLMImage]],
176
- top_logprobs: int = 5,
177
- ) -> Tuple[ParsedChatCompletion, float]:
178
- client = self._client()
179
- prompt = self.generate_prompt(multimodal_input)
180
- messages = [{"role": "user", "content": prompt}]
181
- completion = client.chat.completions.create(
182
- model=self.model_name,
183
- messages=messages,
184
- logprobs=True,
185
- top_logprobs=top_logprobs,
186
- )
187
- # Cost calculation
188
- input_tokens = completion.usage.prompt_tokens
189
- output_tokens = completion.usage.completion_tokens
190
- cost = self.calculate_cost(input_tokens, output_tokens)
191
- return completion, cost
192
-
193
- @retry_openai
194
- async def a_generate_raw_response(
195
- self,
196
- multimodal_input: List[Union[str, MLLMImage]],
197
- top_logprobs: int = 5,
198
- ) -> Tuple[ParsedChatCompletion, float]:
199
- client = self._client(async_mode=True)
200
- prompt = self.generate_prompt(multimodal_input)
201
- messages = [{"role": "user", "content": prompt}]
202
- completion = await client.chat.completions.create(
203
- model=self.model_name,
204
- messages=messages,
205
- logprobs=True,
206
- top_logprobs=top_logprobs,
207
- )
208
- # Cost calculation
209
- input_tokens = completion.usage.prompt_tokens
210
- output_tokens = completion.usage.completion_tokens
211
- cost = self.calculate_cost(input_tokens, output_tokens)
212
- return completion, cost
213
-
214
- ###############################################
215
- # Utilities
216
- ###############################################
217
-
218
- def generate_prompt(
219
- self, multimodal_input: List[Union[str, MLLMImage]] = []
220
- ):
221
- prompt = []
222
- for ele in multimodal_input:
223
- if isinstance(ele, str):
224
- prompt.append({"type": "text", "text": ele})
225
- elif isinstance(ele, MLLMImage):
226
- if ele.local:
227
- import PIL.Image
228
-
229
- image = PIL.Image.open(ele.url)
230
- visual_dict = {
231
- "type": "image_url",
232
- "image_url": {
233
- "url": f"data:image/jpeg;base64,{self.encode_pil_image(image)}"
234
- },
235
- }
236
- else:
237
- visual_dict = {
238
- "type": "image_url",
239
- "image_url": {"url": ele.url},
240
- }
241
- prompt.append(visual_dict)
242
- return prompt
243
-
244
- def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
245
- pricing = model_pricing.get(
246
- self.model_name, model_pricing["gpt-4.1"]
247
- ) # Default to 'gpt-4.1' if model not found
248
- input_cost = input_tokens * pricing["input"]
249
- output_cost = output_tokens * pricing["output"]
250
- return input_cost + output_cost
251
-
252
- def encode_pil_image(self, pil_image):
253
- image_buffer = BytesIO()
254
- if pil_image.mode in ("RGBA", "LA", "P"):
255
- pil_image = pil_image.convert("RGB")
256
- pil_image.save(image_buffer, format="JPEG")
257
- image_bytes = image_buffer.getvalue()
258
- base64_encoded_image = base64.b64encode(image_bytes).decode("utf-8")
259
- return base64_encoded_image
260
-
261
- ###############################################
262
- # Model
263
- ###############################################
264
-
265
- def get_model_name(self):
266
- return self.model_name
267
-
268
- def load_model(self, async_mode: bool = False):
269
- Client = AsyncOpenAI if async_mode else OpenAI
270
- return self._build_client(Client)
271
-
272
- def _client_kwargs(self) -> Dict:
273
- """
274
- If Tenacity is managing retries, force OpenAI SDK retries off to avoid
275
- double retries. If the user opts into SDK retries for 'openai' via
276
- DEEPEVAL_SDK_RETRY_PROVIDERS, leave their retry settings as is.
277
- """
278
- kwargs: Dict = {}
279
- if not sdk_retries_for(PS.OPENAI):
280
- kwargs["max_retries"] = 0
281
-
282
- if not kwargs.get("timeout"):
283
- kwargs["timeout"] = _request_timeout_seconds()
284
- return kwargs
285
-
286
- def _build_client(self, cls):
287
- api_key = require_secret_api_key(
288
- self._openai_api_key,
289
- provider_label="OpenAI",
290
- env_var_name="OPENAI_API_KEY",
291
- param_hint="`_openai_api_key` to MultimodalOpenAIModel(...)",
292
- )
293
-
294
- kw = dict(
295
- api_key=api_key,
296
- **self._client_kwargs(),
297
- )
298
- try:
299
- return cls(**kw)
300
- except TypeError as e:
301
- # older OpenAI SDKs may not accept max_retries, in that case remove and retry once
302
- if "max_retries" in str(e):
303
- kw.pop("max_retries", None)
304
- return cls(**kw)
305
- raise
306
-
307
- def _client(self, async_mode: bool = False):
308
- # Backwards-compat path for internal callers in this module
309
- return self.load_model(async_mode=async_mode)
@@ -1,13 +0,0 @@
1
- from deepeval.optimization.prompt_optimizer import PromptOptimizer
2
- from deepeval.optimization.configs import OptimizerDisplayConfig
3
- from deepeval.optimization.gepa.loop import (
4
- GEPARunner as GEPARunner,
5
- GEPAConfig as GEPAConfig,
6
- )
7
-
8
- __all__ = [
9
- "GEPARunner",
10
- "GEPAConfig",
11
- "PromptOptimizer",
12
- "OptimizerDisplayConfig",
13
- ]
@@ -1,2 +0,0 @@
1
- # nothing yet
2
- __all__ = []