deepeval 3.7.4__py3-none-any.whl → 3.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/config/settings.py +35 -1
- deepeval/dataset/api.py +23 -1
- deepeval/dataset/golden.py +139 -2
- deepeval/evaluate/evaluate.py +16 -11
- deepeval/evaluate/execute.py +13 -181
- deepeval/evaluate/utils.py +6 -26
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/key_handler.py +3 -0
- deepeval/metrics/__init__.py +14 -16
- deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
- deepeval/metrics/answer_relevancy/template.py +22 -3
- deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
- deepeval/metrics/arena_g_eval/template.py +17 -1
- deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
- deepeval/metrics/argument_correctness/template.py +19 -2
- deepeval/metrics/base_metric.py +13 -44
- deepeval/metrics/bias/bias.py +102 -108
- deepeval/metrics/bias/template.py +14 -2
- deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
- deepeval/metrics/conversation_completeness/template.py +23 -3
- deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
- deepeval/metrics/conversational_dag/nodes.py +66 -123
- deepeval/metrics/conversational_dag/templates.py +16 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
- deepeval/metrics/dag/dag.py +10 -0
- deepeval/metrics/dag/nodes.py +63 -126
- deepeval/metrics/dag/templates.py +16 -2
- deepeval/metrics/exact_match/exact_match.py +9 -1
- deepeval/metrics/faithfulness/faithfulness.py +138 -149
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/g_eval.py +87 -78
- deepeval/metrics/g_eval/template.py +18 -1
- deepeval/metrics/g_eval/utils.py +7 -6
- deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
- deepeval/metrics/goal_accuracy/template.py +21 -3
- deepeval/metrics/hallucination/hallucination.py +60 -75
- deepeval/metrics/hallucination/template.py +13 -0
- deepeval/metrics/indicator.py +7 -10
- deepeval/metrics/json_correctness/json_correctness.py +40 -38
- deepeval/metrics/json_correctness/template.py +10 -0
- deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
- deepeval/metrics/knowledge_retention/schema.py +9 -3
- deepeval/metrics/knowledge_retention/template.py +12 -0
- deepeval/metrics/mcp/mcp_task_completion.py +68 -38
- deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
- deepeval/metrics/mcp/template.py +52 -0
- deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
- deepeval/metrics/mcp_use_metric/template.py +12 -0
- deepeval/metrics/misuse/misuse.py +77 -97
- deepeval/metrics/misuse/template.py +15 -0
- deepeval/metrics/multimodal_metrics/__init__.py +0 -19
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
- deepeval/metrics/non_advice/non_advice.py +79 -105
- deepeval/metrics/non_advice/template.py +12 -0
- deepeval/metrics/pattern_match/pattern_match.py +12 -4
- deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
- deepeval/metrics/pii_leakage/template.py +14 -0
- deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
- deepeval/metrics/plan_adherence/template.py +11 -0
- deepeval/metrics/plan_quality/plan_quality.py +63 -87
- deepeval/metrics/plan_quality/template.py +9 -0
- deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
- deepeval/metrics/prompt_alignment/template.py +12 -0
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_adherence/role_adherence.py +48 -71
- deepeval/metrics/role_adherence/template.py +14 -0
- deepeval/metrics/role_violation/role_violation.py +75 -108
- deepeval/metrics/role_violation/template.py +12 -0
- deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
- deepeval/metrics/step_efficiency/template.py +11 -0
- deepeval/metrics/summarization/summarization.py +115 -183
- deepeval/metrics/summarization/template.py +19 -0
- deepeval/metrics/task_completion/task_completion.py +67 -73
- deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
- deepeval/metrics/tool_use/tool_use.py +42 -66
- deepeval/metrics/topic_adherence/template.py +13 -0
- deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
- deepeval/metrics/toxicity/template.py +13 -0
- deepeval/metrics/toxicity/toxicity.py +80 -99
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
- deepeval/metrics/turn_relevancy/template.py +14 -0
- deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
- deepeval/metrics/utils.py +158 -122
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +49 -33
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +79 -33
- deepeval/models/embedding_models/local_embedding_model.py +39 -20
- deepeval/models/embedding_models/ollama_embedding_model.py +52 -19
- deepeval/models/embedding_models/openai_embedding_model.py +42 -22
- deepeval/models/llms/amazon_bedrock_model.py +226 -72
- deepeval/models/llms/anthropic_model.py +178 -63
- deepeval/models/llms/azure_model.py +218 -60
- deepeval/models/llms/constants.py +2032 -0
- deepeval/models/llms/deepseek_model.py +95 -40
- deepeval/models/llms/gemini_model.py +209 -64
- deepeval/models/llms/grok_model.py +139 -68
- deepeval/models/llms/kimi_model.py +140 -90
- deepeval/models/llms/litellm_model.py +131 -37
- deepeval/models/llms/local_model.py +125 -21
- deepeval/models/llms/ollama_model.py +147 -24
- deepeval/models/llms/openai_model.py +222 -269
- deepeval/models/llms/portkey_model.py +81 -22
- deepeval/models/llms/utils.py +8 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +106 -5
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
- deepeval/{optimization → optimizer}/configs.py +5 -8
- deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/{optimization → optimizer}/utils.py +47 -165
- deepeval/prompt/prompt.py +5 -9
- deepeval/simulator/conversation_simulator.py +43 -0
- deepeval/simulator/template.py +13 -0
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +26 -45
- deepeval/test_case/arena_test_case.py +7 -2
- deepeval/test_case/conversational_test_case.py +68 -1
- deepeval/test_case/llm_test_case.py +206 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +18 -14
- deepeval/test_run/test_run.py +3 -3
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +65 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/METADATA +1 -4
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/RECORD +180 -193
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -343
- deepeval/models/mlllms/gemini_model.py +0 -313
- deepeval/models/mlllms/ollama_model.py +0 -175
- deepeval/models/mlllms/openai_model.py +0 -309
- deepeval/optimization/__init__.py +0 -13
- deepeval/optimization/adapters/__init__.py +0 -2
- deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
- deepeval/optimization/aggregates.py +0 -14
- deepeval/optimization/copro/configs.py +0 -31
- deepeval/optimization/gepa/__init__.py +0 -7
- deepeval/optimization/gepa/configs.py +0 -115
- deepeval/optimization/miprov2/configs.py +0 -134
- deepeval/optimization/miprov2/loop.py +0 -785
- deepeval/optimization/mutations/__init__.py +0 -0
- deepeval/optimization/mutations/prompt_rewriter.py +0 -458
- deepeval/optimization/policies/__init__.py +0 -16
- deepeval/optimization/policies/tie_breaker.py +0 -67
- deepeval/optimization/prompt_optimizer.py +0 -462
- deepeval/optimization/simba/__init__.py +0 -0
- deepeval/optimization/simba/configs.py +0 -33
- deepeval/optimization/types.py +0 -361
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/WHEEL +0 -0
- {deepeval-3.7.4.dist-info → deepeval-3.7.6.dist-info}/entry_points.txt +0 -0
|
@@ -11,15 +11,19 @@ from deepeval.metrics.utils import (
|
|
|
11
11
|
construct_verbose_logs,
|
|
12
12
|
get_turns_in_sliding_window,
|
|
13
13
|
get_unit_interactions,
|
|
14
|
-
trimAndLoadJson,
|
|
15
14
|
initialize_model,
|
|
16
15
|
convert_turn_to_dict,
|
|
16
|
+
a_generate_with_schema_and_extract,
|
|
17
|
+
generate_with_schema_and_extract,
|
|
17
18
|
)
|
|
18
19
|
from deepeval.models import DeepEvalBaseLLM
|
|
19
20
|
from deepeval.metrics.indicator import metric_progress_indicator
|
|
20
21
|
from deepeval.test_case import ConversationalTestCase, Turn, TurnParams
|
|
21
22
|
from deepeval.utils import get_or_create_event_loop, prettify_list
|
|
22
|
-
from deepeval.metrics.turn_relevancy.schema import
|
|
23
|
+
from deepeval.metrics.turn_relevancy.schema import (
|
|
24
|
+
TurnRelevancyVerdict,
|
|
25
|
+
TurnRelevancyScoreReason,
|
|
26
|
+
)
|
|
23
27
|
from deepeval.metrics.api import metric_data_manager
|
|
24
28
|
|
|
25
29
|
|
|
@@ -53,7 +57,12 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
53
57
|
_log_metric_to_confident: bool = True,
|
|
54
58
|
):
|
|
55
59
|
check_conversational_test_case_params(
|
|
56
|
-
test_case,
|
|
60
|
+
test_case,
|
|
61
|
+
self._required_test_case_params,
|
|
62
|
+
self,
|
|
63
|
+
False,
|
|
64
|
+
self.model,
|
|
65
|
+
test_case.multimodal,
|
|
57
66
|
)
|
|
58
67
|
|
|
59
68
|
self.evaluation_cost = 0 if self.using_native_model else None
|
|
@@ -108,7 +117,12 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
108
117
|
_log_metric_to_confident: bool = True,
|
|
109
118
|
) -> float:
|
|
110
119
|
check_conversational_test_case_params(
|
|
111
|
-
test_case,
|
|
120
|
+
test_case,
|
|
121
|
+
self._required_test_case_params,
|
|
122
|
+
self,
|
|
123
|
+
False,
|
|
124
|
+
self.model,
|
|
125
|
+
test_case.multimodal,
|
|
112
126
|
)
|
|
113
127
|
|
|
114
128
|
self.evaluation_cost = 0 if self.using_native_model else None
|
|
@@ -148,7 +162,7 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
148
162
|
)
|
|
149
163
|
return self.score
|
|
150
164
|
|
|
151
|
-
async def _a_generate_reason(self) -> str:
|
|
165
|
+
async def _a_generate_reason(self) -> Optional[str]:
|
|
152
166
|
if self.include_reason is False:
|
|
153
167
|
return None
|
|
154
168
|
|
|
@@ -162,24 +176,19 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
162
176
|
prompt = TurnRelevancyTemplate.generate_reason(
|
|
163
177
|
score=self.score, irrelevancies=irrelevancies
|
|
164
178
|
)
|
|
165
|
-
if self.using_native_model:
|
|
166
|
-
res, cost = await self.model.a_generate(
|
|
167
|
-
prompt, schema=TurnRelevancyScoreReason
|
|
168
|
-
)
|
|
169
|
-
self.evaluation_cost += cost
|
|
170
|
-
return res.reason
|
|
171
|
-
else:
|
|
172
|
-
try:
|
|
173
|
-
res: TurnRelevancyScoreReason = await self.model.a_generate(
|
|
174
|
-
prompt, schema=TurnRelevancyScoreReason
|
|
175
|
-
)
|
|
176
|
-
return res.reason
|
|
177
|
-
except TypeError:
|
|
178
|
-
res = await self.model.a_generate(prompt)
|
|
179
|
-
data = trimAndLoadJson(res, self)
|
|
180
|
-
return data["reason"]
|
|
181
179
|
|
|
182
|
-
|
|
180
|
+
return await a_generate_with_schema_and_extract(
|
|
181
|
+
metric=self,
|
|
182
|
+
prompt=prompt,
|
|
183
|
+
schema_cls=TurnRelevancyScoreReason,
|
|
184
|
+
extract_schema=lambda s: s.reason,
|
|
185
|
+
extract_json=lambda data: data["reason"],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _generate_reason(self) -> Optional[str]:
|
|
189
|
+
if self.include_reason is False:
|
|
190
|
+
return None
|
|
191
|
+
|
|
183
192
|
irrelevancies: List[Dict[str, str]] = []
|
|
184
193
|
for index, verdict in enumerate(self.verdicts):
|
|
185
194
|
if verdict.verdict.strip().lower() == "no":
|
|
@@ -190,22 +199,14 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
190
199
|
prompt = TurnRelevancyTemplate.generate_reason(
|
|
191
200
|
score=self.score, irrelevancies=irrelevancies
|
|
192
201
|
)
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
res: TurnRelevancyScoreReason = self.model.generate(
|
|
202
|
-
prompt, schema=TurnRelevancyScoreReason
|
|
203
|
-
)
|
|
204
|
-
return res.reason
|
|
205
|
-
except TypeError:
|
|
206
|
-
res = self.model.generate(prompt)
|
|
207
|
-
data = trimAndLoadJson(res, self)
|
|
208
|
-
return data["reason"]
|
|
202
|
+
|
|
203
|
+
return generate_with_schema_and_extract(
|
|
204
|
+
metric=self,
|
|
205
|
+
prompt=prompt,
|
|
206
|
+
schema_cls=TurnRelevancyScoreReason,
|
|
207
|
+
extract_schema=lambda s: s.reason,
|
|
208
|
+
extract_json=lambda data: data["reason"],
|
|
209
|
+
)
|
|
209
210
|
|
|
210
211
|
async def _a_generate_verdict(
|
|
211
212
|
self, turns_sliding_window: List[Turn]
|
|
@@ -215,22 +216,14 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
215
216
|
convert_turn_to_dict(turn) for turn in turns_sliding_window
|
|
216
217
|
]
|
|
217
218
|
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
res: TurnRelevancyVerdict = await self.model.a_generate(
|
|
227
|
-
prompt, schema=TurnRelevancyVerdict
|
|
228
|
-
)
|
|
229
|
-
return res
|
|
230
|
-
except TypeError:
|
|
231
|
-
res = await self.model.a_generate(prompt)
|
|
232
|
-
data = trimAndLoadJson(res, self)
|
|
233
|
-
return TurnRelevancyVerdict(**data)
|
|
219
|
+
|
|
220
|
+
return await a_generate_with_schema_and_extract(
|
|
221
|
+
metric=self,
|
|
222
|
+
prompt=prompt,
|
|
223
|
+
schema_cls=TurnRelevancyVerdict,
|
|
224
|
+
extract_schema=lambda s: s,
|
|
225
|
+
extract_json=lambda data: TurnRelevancyVerdict(**data),
|
|
226
|
+
)
|
|
234
227
|
|
|
235
228
|
def _generate_verdict(
|
|
236
229
|
self, turns_sliding_window: List[Turn]
|
|
@@ -240,20 +233,14 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
240
233
|
convert_turn_to_dict(turn) for turn in turns_sliding_window
|
|
241
234
|
]
|
|
242
235
|
)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
self
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
)
|
|
252
|
-
return res
|
|
253
|
-
except TypeError:
|
|
254
|
-
res = self.model.generate(prompt)
|
|
255
|
-
data = trimAndLoadJson(res, self)
|
|
256
|
-
return TurnRelevancyVerdict(**data)
|
|
236
|
+
|
|
237
|
+
return generate_with_schema_and_extract(
|
|
238
|
+
metric=self,
|
|
239
|
+
prompt=prompt,
|
|
240
|
+
schema_cls=TurnRelevancyVerdict,
|
|
241
|
+
extract_schema=lambda s: s,
|
|
242
|
+
extract_json=lambda data: TurnRelevancyVerdict(**data),
|
|
243
|
+
)
|
|
257
244
|
|
|
258
245
|
def _calculate_score(self) -> float:
|
|
259
246
|
number_of_verdicts = len(self.verdicts)
|
|
@@ -274,7 +261,7 @@ class TurnRelevancyMetric(BaseConversationalMetric):
|
|
|
274
261
|
else:
|
|
275
262
|
try:
|
|
276
263
|
self.score >= self.threshold
|
|
277
|
-
except:
|
|
264
|
+
except TypeError:
|
|
278
265
|
self.success = False
|
|
279
266
|
return self.success
|
|
280
267
|
|
deepeval/metrics/utils.py
CHANGED
|
@@ -2,16 +2,24 @@ import inspect
|
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
import sys
|
|
5
|
-
import
|
|
6
|
-
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Callable,
|
|
8
|
+
Dict,
|
|
9
|
+
List,
|
|
10
|
+
Optional,
|
|
11
|
+
Tuple,
|
|
12
|
+
Type,
|
|
13
|
+
TypeVar,
|
|
14
|
+
Union,
|
|
15
|
+
)
|
|
7
16
|
|
|
8
17
|
from deepeval.errors import (
|
|
9
18
|
MissingTestCaseParamsError,
|
|
10
|
-
MismatchedTestCaseInputsError,
|
|
11
19
|
)
|
|
20
|
+
from deepeval.utils import convert_to_multi_modal_array
|
|
12
21
|
from deepeval.models import (
|
|
13
22
|
DeepEvalBaseLLM,
|
|
14
|
-
DeepEvalBaseMLLM,
|
|
15
23
|
GPTModel,
|
|
16
24
|
AnthropicModel,
|
|
17
25
|
AzureOpenAIModel,
|
|
@@ -22,16 +30,20 @@ from deepeval.models import (
|
|
|
22
30
|
OllamaEmbeddingModel,
|
|
23
31
|
LocalEmbeddingModel,
|
|
24
32
|
GeminiModel,
|
|
25
|
-
MultimodalOpenAIModel,
|
|
26
|
-
MultimodalGeminiModel,
|
|
27
|
-
MultimodalOllamaModel,
|
|
28
|
-
MultimodalAzureOpenAIMLLMModel,
|
|
29
33
|
AmazonBedrockModel,
|
|
30
34
|
LiteLLMModel,
|
|
31
35
|
KimiModel,
|
|
32
36
|
GrokModel,
|
|
33
37
|
DeepSeekModel,
|
|
34
38
|
)
|
|
39
|
+
from deepeval.models.llms.constants import (
|
|
40
|
+
OPENAI_MODELS_DATA,
|
|
41
|
+
GEMINI_MODELS_DATA,
|
|
42
|
+
OLLAMA_MODELS_DATA,
|
|
43
|
+
ANTHROPIC_MODELS_DATA,
|
|
44
|
+
GROK_MODELS_DATA,
|
|
45
|
+
KIMI_MODELS_DATA,
|
|
46
|
+
)
|
|
35
47
|
from deepeval.key_handler import (
|
|
36
48
|
ModelKeyValues,
|
|
37
49
|
EmbeddingKeyValues,
|
|
@@ -40,16 +52,12 @@ from deepeval.key_handler import (
|
|
|
40
52
|
from deepeval.metrics import (
|
|
41
53
|
BaseMetric,
|
|
42
54
|
BaseConversationalMetric,
|
|
43
|
-
BaseMultimodalMetric,
|
|
44
55
|
BaseArenaMetric,
|
|
45
56
|
)
|
|
46
57
|
from deepeval.models.base_model import DeepEvalBaseEmbeddingModel
|
|
47
58
|
from deepeval.test_case import (
|
|
48
|
-
Turn,
|
|
49
59
|
LLMTestCase,
|
|
50
60
|
LLMTestCaseParams,
|
|
51
|
-
MLLMTestCase,
|
|
52
|
-
MLLMTestCaseParams,
|
|
53
61
|
ConversationalTestCase,
|
|
54
62
|
MLLMImage,
|
|
55
63
|
Turn,
|
|
@@ -58,12 +66,20 @@ from deepeval.test_case import (
|
|
|
58
66
|
TurnParams,
|
|
59
67
|
)
|
|
60
68
|
|
|
69
|
+
MULTIMODAL_SUPPORTED_MODELS = {
|
|
70
|
+
GPTModel: OPENAI_MODELS_DATA,
|
|
71
|
+
GeminiModel: GEMINI_MODELS_DATA,
|
|
72
|
+
OllamaModel: OLLAMA_MODELS_DATA,
|
|
73
|
+
AzureOpenAIModel: OPENAI_MODELS_DATA,
|
|
74
|
+
KimiModel: KIMI_MODELS_DATA,
|
|
75
|
+
AnthropicModel: ANTHROPIC_MODELS_DATA,
|
|
76
|
+
GrokModel: GROK_MODELS_DATA,
|
|
77
|
+
}
|
|
78
|
+
|
|
61
79
|
|
|
62
80
|
def copy_metrics(
|
|
63
|
-
metrics: List[
|
|
64
|
-
|
|
65
|
-
],
|
|
66
|
-
) -> List[Union[BaseMetric, BaseMultimodalMetric, BaseConversationalMetric]]:
|
|
81
|
+
metrics: List[Union[BaseMetric, BaseConversationalMetric]],
|
|
82
|
+
) -> List[Union[BaseMetric, BaseConversationalMetric]]:
|
|
67
83
|
copied_metrics = []
|
|
68
84
|
for metric in metrics:
|
|
69
85
|
metric_class = type(metric)
|
|
@@ -201,7 +217,28 @@ def check_conversational_test_case_params(
|
|
|
201
217
|
test_case_params: List[TurnParams],
|
|
202
218
|
metric: BaseConversationalMetric,
|
|
203
219
|
require_chatbot_role: bool = False,
|
|
220
|
+
model: Optional[DeepEvalBaseLLM] = None,
|
|
221
|
+
multimodal: Optional[bool] = False,
|
|
204
222
|
):
|
|
223
|
+
if multimodal:
|
|
224
|
+
if not model or not model.supports_multimodal():
|
|
225
|
+
if model and type(model) in MULTIMODAL_SUPPORTED_MODELS.keys():
|
|
226
|
+
valid_multimodal_models = []
|
|
227
|
+
for model_name, model_data in MULTIMODAL_SUPPORTED_MODELS.get(
|
|
228
|
+
type(model)
|
|
229
|
+
).items():
|
|
230
|
+
if callable(model_data):
|
|
231
|
+
model_data = model_data()
|
|
232
|
+
if model_data.supports_multimodal:
|
|
233
|
+
valid_multimodal_models.append(model_name)
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(valid_multimodal_models)}."
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
raise ValueError(
|
|
239
|
+
f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS.keys()])}"
|
|
240
|
+
)
|
|
241
|
+
|
|
205
242
|
if isinstance(test_case, ConversationalTestCase) is False:
|
|
206
243
|
error_str = f"Unable to evaluate test cases that are not of type 'ConversationalTestCase' using the conversational '{metric.__name__}' metric."
|
|
207
244
|
metric.error = error_str
|
|
@@ -234,8 +271,49 @@ def check_conversational_test_case_params(
|
|
|
234
271
|
def check_llm_test_case_params(
|
|
235
272
|
test_case: LLMTestCase,
|
|
236
273
|
test_case_params: List[LLMTestCaseParams],
|
|
274
|
+
input_image_count: Optional[int],
|
|
275
|
+
actual_output_image_count: Optional[int],
|
|
237
276
|
metric: Union[BaseMetric, BaseArenaMetric],
|
|
277
|
+
model: Optional[DeepEvalBaseLLM] = None,
|
|
278
|
+
multimodal: Optional[bool] = False,
|
|
238
279
|
):
|
|
280
|
+
if multimodal:
|
|
281
|
+
if not model or not model.supports_multimodal():
|
|
282
|
+
if model and type(model) in MULTIMODAL_SUPPORTED_MODELS.keys():
|
|
283
|
+
valid_multimodal_models = []
|
|
284
|
+
for model_name, model_data in MULTIMODAL_SUPPORTED_MODELS.get(
|
|
285
|
+
type(model)
|
|
286
|
+
).items():
|
|
287
|
+
if callable(model_data):
|
|
288
|
+
model_data = model_data()
|
|
289
|
+
if model_data.supports_multimodal:
|
|
290
|
+
valid_multimodal_models.append(model_name)
|
|
291
|
+
raise ValueError(
|
|
292
|
+
f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(valid_multimodal_models)}."
|
|
293
|
+
)
|
|
294
|
+
else:
|
|
295
|
+
raise ValueError(
|
|
296
|
+
f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS.keys()])}"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if input_image_count:
|
|
300
|
+
count = 0
|
|
301
|
+
for ele in convert_to_multi_modal_array(test_case.input):
|
|
302
|
+
if isinstance(ele, MLLMImage):
|
|
303
|
+
count += 1
|
|
304
|
+
if count != input_image_count:
|
|
305
|
+
error_str = f"Can only evaluate test cases with '{input_image_count}' input images using the '{metric.__name__}' metric. `{count}` found."
|
|
306
|
+
raise ValueError(error_str)
|
|
307
|
+
|
|
308
|
+
if actual_output_image_count:
|
|
309
|
+
count = 0
|
|
310
|
+
for ele in convert_to_multi_modal_array(test_case.actual_output):
|
|
311
|
+
if isinstance(ele, MLLMImage):
|
|
312
|
+
count += 1
|
|
313
|
+
if count != actual_output_image_count:
|
|
314
|
+
error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
|
|
315
|
+
raise ValueError(error_str)
|
|
316
|
+
|
|
239
317
|
if isinstance(test_case, LLMTestCase) is False:
|
|
240
318
|
error_str = f"Unable to evaluate test cases that are not of type 'LLMTestCase' using the non-conversational '{metric.__name__}' metric."
|
|
241
319
|
metric.error = error_str
|
|
@@ -265,6 +343,8 @@ def check_arena_test_case_params(
|
|
|
265
343
|
arena_test_case: ArenaTestCase,
|
|
266
344
|
test_case_params: List[LLMTestCaseParams],
|
|
267
345
|
metric: BaseArenaMetric,
|
|
346
|
+
model: Optional[DeepEvalBaseLLM] = None,
|
|
347
|
+
multimodal: Optional[bool] = False,
|
|
268
348
|
):
|
|
269
349
|
if not isinstance(arena_test_case, ArenaTestCase):
|
|
270
350
|
raise ValueError(
|
|
@@ -285,73 +365,8 @@ def check_arena_test_case_params(
|
|
|
285
365
|
)
|
|
286
366
|
|
|
287
367
|
for test_case in cases:
|
|
288
|
-
check_llm_test_case_params(
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
def check_mllm_test_case_params(
|
|
292
|
-
test_case: MLLMTestCase,
|
|
293
|
-
test_case_params: List[MLLMTestCaseParams],
|
|
294
|
-
input_image_count: Optional[int],
|
|
295
|
-
actual_output_image_count: Optional[int],
|
|
296
|
-
metric: BaseMetric,
|
|
297
|
-
):
|
|
298
|
-
if input_image_count:
|
|
299
|
-
count = 0
|
|
300
|
-
for ele in test_case.input:
|
|
301
|
-
if isinstance(ele, MLLMImage):
|
|
302
|
-
count += 1
|
|
303
|
-
if count != input_image_count:
|
|
304
|
-
error_str = f"Can only evaluate test cases with '{input_image_count}' input images using the '{metric.__name__}' metric. `{count}` found."
|
|
305
|
-
raise ValueError(error_str)
|
|
306
|
-
|
|
307
|
-
if actual_output_image_count:
|
|
308
|
-
count = 0
|
|
309
|
-
for ele in test_case.actual_output:
|
|
310
|
-
if isinstance(ele, MLLMImage):
|
|
311
|
-
count += 1
|
|
312
|
-
if count != actual_output_image_count:
|
|
313
|
-
error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
|
|
314
|
-
raise ValueError(error_str)
|
|
315
|
-
|
|
316
|
-
if isinstance(test_case, MLLMTestCase) is False:
|
|
317
|
-
error_str = f"Unable to evaluate test cases that are not of type 'MLLMTestCase' using the '{metric.__name__}' metric."
|
|
318
|
-
metric.error = error_str
|
|
319
|
-
raise ValueError(error_str)
|
|
320
|
-
|
|
321
|
-
missing_params = []
|
|
322
|
-
for param in test_case_params:
|
|
323
|
-
if getattr(test_case, param.value) is None:
|
|
324
|
-
missing_params.append(f"'{param.value}'")
|
|
325
|
-
|
|
326
|
-
if missing_params:
|
|
327
|
-
if len(missing_params) == 1:
|
|
328
|
-
missing_params_str = missing_params[0]
|
|
329
|
-
elif len(missing_params) == 2:
|
|
330
|
-
missing_params_str = " and ".join(missing_params)
|
|
331
|
-
else:
|
|
332
|
-
missing_params_str = (
|
|
333
|
-
", ".join(missing_params[:-1]) + ", and " + missing_params[-1]
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
error_str = f"{missing_params_str} cannot be None for the '{metric.__name__}' metric"
|
|
337
|
-
metric.error = error_str
|
|
338
|
-
raise MissingTestCaseParamsError(error_str)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
def check_mllm_test_cases_params(
|
|
342
|
-
test_cases: List[MLLMTestCase],
|
|
343
|
-
test_case_params: List[MLLMTestCaseParams],
|
|
344
|
-
input_image_count: Optional[int],
|
|
345
|
-
actual_output_image_count: Optional[int],
|
|
346
|
-
metric: BaseMetric,
|
|
347
|
-
):
|
|
348
|
-
for test_case in test_cases:
|
|
349
|
-
check_mllm_test_case_params(
|
|
350
|
-
test_case,
|
|
351
|
-
test_case_params,
|
|
352
|
-
input_image_count,
|
|
353
|
-
actual_output_image_count,
|
|
354
|
-
metric,
|
|
368
|
+
check_llm_test_case_params(
|
|
369
|
+
test_case, test_case_params, None, None, metric, model, multimodal
|
|
355
370
|
)
|
|
356
371
|
|
|
357
372
|
|
|
@@ -381,6 +396,63 @@ def trimAndLoadJson(
|
|
|
381
396
|
raise Exception(f"An unexpected error occurred: {str(e)}")
|
|
382
397
|
|
|
383
398
|
|
|
399
|
+
SchemaType = TypeVar("SchemaType")
|
|
400
|
+
ReturnType = TypeVar("ReturnType")
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def generate_with_schema_and_extract(
|
|
404
|
+
metric: Union[BaseMetric, BaseArenaMetric, BaseConversationalMetric],
|
|
405
|
+
prompt: Any,
|
|
406
|
+
schema_cls: Type[SchemaType],
|
|
407
|
+
*,
|
|
408
|
+
extract_schema: Callable[[SchemaType], ReturnType],
|
|
409
|
+
extract_json: Callable[[Dict[str, Any]], ReturnType],
|
|
410
|
+
) -> ReturnType:
|
|
411
|
+
"""
|
|
412
|
+
Synchronous wrapper:
|
|
413
|
+
- calls model.generate_with_schema(...)
|
|
414
|
+
- accrues cost if applicable
|
|
415
|
+
- if schema instance -> extract_schema
|
|
416
|
+
else parse JSON -> extract_json
|
|
417
|
+
"""
|
|
418
|
+
if metric.using_native_model:
|
|
419
|
+
result, cost = metric.model.generate_with_schema(
|
|
420
|
+
prompt, schema=schema_cls
|
|
421
|
+
)
|
|
422
|
+
metric._accrue_cost(cost)
|
|
423
|
+
else:
|
|
424
|
+
result = metric.model.generate_with_schema(prompt, schema=schema_cls)
|
|
425
|
+
if isinstance(result, schema_cls):
|
|
426
|
+
return extract_schema(result)
|
|
427
|
+
data = trimAndLoadJson(result, metric)
|
|
428
|
+
return extract_json(data)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
async def a_generate_with_schema_and_extract(
|
|
432
|
+
metric: Union[BaseMetric, BaseArenaMetric, BaseConversationalMetric],
|
|
433
|
+
prompt: Any,
|
|
434
|
+
schema_cls: Type[SchemaType],
|
|
435
|
+
*,
|
|
436
|
+
extract_schema: Callable[[SchemaType], ReturnType],
|
|
437
|
+
extract_json: Callable[[Dict[str, Any]], ReturnType],
|
|
438
|
+
) -> ReturnType:
|
|
439
|
+
if metric.using_native_model:
|
|
440
|
+
result, cost = await metric.model.a_generate_with_schema(
|
|
441
|
+
prompt, schema=schema_cls
|
|
442
|
+
)
|
|
443
|
+
metric._accrue_cost(cost)
|
|
444
|
+
else:
|
|
445
|
+
result = await metric.model.a_generate_with_schema(
|
|
446
|
+
prompt, schema=schema_cls
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
if isinstance(result, schema_cls):
|
|
450
|
+
return extract_schema(result)
|
|
451
|
+
|
|
452
|
+
data = trimAndLoadJson(result, metric)
|
|
453
|
+
return extract_json(data)
|
|
454
|
+
|
|
455
|
+
|
|
384
456
|
###############################################
|
|
385
457
|
# Default Model Providers
|
|
386
458
|
###############################################
|
|
@@ -397,8 +469,8 @@ def should_use_local_model():
|
|
|
397
469
|
|
|
398
470
|
|
|
399
471
|
def should_use_ollama_model():
|
|
400
|
-
|
|
401
|
-
return
|
|
472
|
+
value = KEY_FILE_HANDLER.fetch_data(ModelKeyValues.LOCAL_MODEL_API_KEY)
|
|
473
|
+
return value == "ollama"
|
|
402
474
|
|
|
403
475
|
|
|
404
476
|
def should_use_gemini_model():
|
|
@@ -459,7 +531,7 @@ def initialize_model(
|
|
|
459
531
|
elif should_use_local_model():
|
|
460
532
|
return LocalModel(), True
|
|
461
533
|
elif should_use_azure_openai():
|
|
462
|
-
return AzureOpenAIModel(
|
|
534
|
+
return AzureOpenAIModel(model=model), True
|
|
463
535
|
elif should_use_moonshot_model():
|
|
464
536
|
return KimiModel(model=model), True
|
|
465
537
|
elif should_use_grok_model():
|
|
@@ -501,42 +573,6 @@ def is_native_model(
|
|
|
501
573
|
###############################################
|
|
502
574
|
|
|
503
575
|
|
|
504
|
-
def initialize_multimodal_model(
|
|
505
|
-
model: Optional[Union[str, DeepEvalBaseMLLM]] = None,
|
|
506
|
-
) -> Tuple[DeepEvalBaseLLM, bool]:
|
|
507
|
-
"""
|
|
508
|
-
Returns a tuple of (initialized DeepEvalBaseMLLM, using_native_model boolean)
|
|
509
|
-
"""
|
|
510
|
-
if is_native_mllm(model):
|
|
511
|
-
return model, True
|
|
512
|
-
if isinstance(model, DeepEvalBaseMLLM):
|
|
513
|
-
return model, False
|
|
514
|
-
if should_use_gemini_model():
|
|
515
|
-
return MultimodalGeminiModel(), True
|
|
516
|
-
if should_use_ollama_model():
|
|
517
|
-
return MultimodalOllamaModel(), True
|
|
518
|
-
elif should_use_azure_openai():
|
|
519
|
-
return MultimodalAzureOpenAIMLLMModel(model_name=model), True
|
|
520
|
-
elif isinstance(model, str) or model is None:
|
|
521
|
-
return MultimodalOpenAIModel(model=model), True
|
|
522
|
-
raise TypeError(
|
|
523
|
-
f"Unsupported type for model: {type(model)}. Expected None, str, DeepEvalBaseMLLM, MultimodalOpenAIModel, MultimodalOllamaModel."
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
def is_native_mllm(
|
|
528
|
-
model: Optional[Union[str, DeepEvalBaseLLM]] = None,
|
|
529
|
-
) -> bool:
|
|
530
|
-
if (
|
|
531
|
-
isinstance(model, MultimodalOpenAIModel)
|
|
532
|
-
or isinstance(model, MultimodalOllamaModel)
|
|
533
|
-
or isinstance(model, MultimodalGeminiModel)
|
|
534
|
-
):
|
|
535
|
-
return True
|
|
536
|
-
else:
|
|
537
|
-
return False
|
|
538
|
-
|
|
539
|
-
|
|
540
576
|
###############################################
|
|
541
577
|
# Embedding Model
|
|
542
578
|
###############################################
|
deepeval/models/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from deepeval.models.base_model import (
|
|
2
2
|
DeepEvalBaseModel,
|
|
3
3
|
DeepEvalBaseLLM,
|
|
4
|
-
DeepEvalBaseMLLM,
|
|
5
4
|
DeepEvalBaseEmbeddingModel,
|
|
6
5
|
)
|
|
7
6
|
from deepeval.models.llms import (
|
|
@@ -17,12 +16,6 @@ from deepeval.models.llms import (
|
|
|
17
16
|
GrokModel,
|
|
18
17
|
DeepSeekModel,
|
|
19
18
|
)
|
|
20
|
-
from deepeval.models.mlllms import (
|
|
21
|
-
MultimodalOpenAIModel,
|
|
22
|
-
MultimodalOllamaModel,
|
|
23
|
-
MultimodalGeminiModel,
|
|
24
|
-
MultimodalAzureOpenAIMLLMModel,
|
|
25
|
-
)
|
|
26
19
|
from deepeval.models.embedding_models import (
|
|
27
20
|
OpenAIEmbeddingModel,
|
|
28
21
|
AzureOpenAIEmbeddingModel,
|
|
@@ -33,7 +26,6 @@ from deepeval.models.embedding_models import (
|
|
|
33
26
|
__all__ = [
|
|
34
27
|
"DeepEvalBaseModel",
|
|
35
28
|
"DeepEvalBaseLLM",
|
|
36
|
-
"DeepEvalBaseMLLM",
|
|
37
29
|
"DeepEvalBaseEmbeddingModel",
|
|
38
30
|
"GPTModel",
|
|
39
31
|
"AzureOpenAIModel",
|
|
@@ -46,10 +38,6 @@ __all__ = [
|
|
|
46
38
|
"KimiModel",
|
|
47
39
|
"GrokModel",
|
|
48
40
|
"DeepSeekModel",
|
|
49
|
-
"MultimodalOpenAIModel",
|
|
50
|
-
"MultimodalOllamaModel",
|
|
51
|
-
"MultimodalGeminiModel",
|
|
52
|
-
"MultimodalAzureOpenAIMLLMModel",
|
|
53
41
|
"OpenAIEmbeddingModel",
|
|
54
42
|
"AzureOpenAIEmbeddingModel",
|
|
55
43
|
"LocalEmbeddingModel",
|