deepeval 3.7.3__py3-none-any.whl → 3.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/cli/test.py +1 -1
- deepeval/config/settings.py +102 -13
- deepeval/dataset/golden.py +54 -2
- deepeval/evaluate/configs.py +1 -1
- deepeval/evaluate/evaluate.py +16 -8
- deepeval/evaluate/execute.py +74 -27
- deepeval/evaluate/utils.py +26 -22
- deepeval/integrations/pydantic_ai/agent.py +19 -2
- deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
- deepeval/metrics/__init__.py +14 -12
- deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
- deepeval/metrics/answer_relevancy/template.py +188 -92
- deepeval/metrics/argument_correctness/template.py +2 -2
- deepeval/metrics/base_metric.py +2 -5
- deepeval/metrics/bias/template.py +3 -3
- deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
- deepeval/metrics/contextual_precision/template.py +115 -66
- deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
- deepeval/metrics/contextual_recall/template.py +106 -55
- deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
- deepeval/metrics/contextual_relevancy/template.py +87 -58
- deepeval/metrics/conversation_completeness/template.py +2 -2
- deepeval/metrics/conversational_dag/templates.py +4 -4
- deepeval/metrics/conversational_g_eval/template.py +4 -3
- deepeval/metrics/dag/templates.py +5 -5
- deepeval/metrics/faithfulness/faithfulness.py +70 -27
- deepeval/metrics/faithfulness/schema.py +1 -1
- deepeval/metrics/faithfulness/template.py +200 -115
- deepeval/metrics/g_eval/utils.py +2 -2
- deepeval/metrics/hallucination/template.py +4 -4
- deepeval/metrics/indicator.py +4 -4
- deepeval/metrics/misuse/template.py +2 -2
- deepeval/metrics/multimodal_metrics/__init__.py +0 -18
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
- deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
- deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
- deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
- deepeval/metrics/non_advice/template.py +2 -2
- deepeval/metrics/pii_leakage/template.py +2 -2
- deepeval/metrics/prompt_alignment/template.py +4 -4
- deepeval/metrics/ragas.py +3 -3
- deepeval/metrics/role_violation/template.py +2 -2
- deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
- deepeval/metrics/toxicity/template.py +4 -4
- deepeval/metrics/turn_contextual_precision/schema.py +21 -0
- deepeval/metrics/turn_contextual_precision/template.py +187 -0
- deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
- deepeval/metrics/turn_contextual_recall/schema.py +21 -0
- deepeval/metrics/turn_contextual_recall/template.py +178 -0
- deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
- deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
- deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
- deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
- deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
- deepeval/metrics/turn_faithfulness/template.py +218 -0
- deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
- deepeval/metrics/turn_relevancy/template.py +2 -2
- deepeval/metrics/utils.py +39 -58
- deepeval/models/__init__.py +0 -12
- deepeval/models/base_model.py +16 -38
- deepeval/models/embedding_models/__init__.py +7 -0
- deepeval/models/embedding_models/azure_embedding_model.py +69 -32
- deepeval/models/embedding_models/local_embedding_model.py +39 -22
- deepeval/models/embedding_models/ollama_embedding_model.py +42 -18
- deepeval/models/embedding_models/openai_embedding_model.py +50 -15
- deepeval/models/llms/amazon_bedrock_model.py +1 -2
- deepeval/models/llms/anthropic_model.py +53 -20
- deepeval/models/llms/azure_model.py +140 -43
- deepeval/models/llms/deepseek_model.py +38 -23
- deepeval/models/llms/gemini_model.py +222 -103
- deepeval/models/llms/grok_model.py +39 -27
- deepeval/models/llms/kimi_model.py +39 -23
- deepeval/models/llms/litellm_model.py +103 -45
- deepeval/models/llms/local_model.py +35 -22
- deepeval/models/llms/ollama_model.py +129 -17
- deepeval/models/llms/openai_model.py +151 -50
- deepeval/models/llms/portkey_model.py +149 -0
- deepeval/models/llms/utils.py +5 -3
- deepeval/models/retry_policy.py +17 -14
- deepeval/models/utils.py +94 -4
- deepeval/optimizer/__init__.py +5 -0
- deepeval/optimizer/algorithms/__init__.py +6 -0
- deepeval/optimizer/algorithms/base.py +29 -0
- deepeval/optimizer/algorithms/configs.py +18 -0
- deepeval/optimizer/algorithms/copro/__init__.py +5 -0
- deepeval/optimizer/algorithms/copro/copro.py +836 -0
- deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
- deepeval/optimizer/algorithms/gepa/gepa.py +737 -0
- deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
- deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
- deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
- deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
- deepeval/optimizer/algorithms/simba/__init__.py +5 -0
- deepeval/optimizer/algorithms/simba/simba.py +999 -0
- deepeval/optimizer/algorithms/simba/types.py +15 -0
- deepeval/optimizer/configs.py +31 -0
- deepeval/optimizer/policies.py +227 -0
- deepeval/optimizer/prompt_optimizer.py +263 -0
- deepeval/optimizer/rewriter/__init__.py +5 -0
- deepeval/optimizer/rewriter/rewriter.py +124 -0
- deepeval/optimizer/rewriter/utils.py +214 -0
- deepeval/optimizer/scorer/__init__.py +5 -0
- deepeval/optimizer/scorer/base.py +86 -0
- deepeval/optimizer/scorer/scorer.py +316 -0
- deepeval/optimizer/scorer/utils.py +30 -0
- deepeval/optimizer/types.py +148 -0
- deepeval/optimizer/utils.py +480 -0
- deepeval/prompt/prompt.py +7 -6
- deepeval/test_case/__init__.py +1 -3
- deepeval/test_case/api.py +12 -10
- deepeval/test_case/conversational_test_case.py +19 -1
- deepeval/test_case/llm_test_case.py +152 -1
- deepeval/test_case/utils.py +4 -8
- deepeval/test_run/api.py +15 -14
- deepeval/test_run/cache.py +2 -0
- deepeval/test_run/test_run.py +9 -4
- deepeval/tracing/patchers.py +9 -4
- deepeval/tracing/tracing.py +2 -2
- deepeval/utils.py +89 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/RECORD +134 -118
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
- deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
- deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
- deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
- deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
- deepeval/models/mlllms/__init__.py +0 -4
- deepeval/models/mlllms/azure_model.py +0 -334
- deepeval/models/mlllms/gemini_model.py +0 -284
- deepeval/models/mlllms/ollama_model.py +0 -144
- deepeval/models/mlllms/openai_model.py +0 -258
- deepeval/test_case/mllm_test_case.py +0 -170
- /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
- /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
- {deepeval-3.7.3.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
deepeval/metrics/utils.py
CHANGED
|
@@ -2,16 +2,14 @@ import inspect
|
|
|
2
2
|
import json
|
|
3
3
|
import re
|
|
4
4
|
import sys
|
|
5
|
-
import itertools
|
|
6
5
|
from typing import Any, Dict, Optional, List, Union, Tuple
|
|
7
6
|
|
|
8
7
|
from deepeval.errors import (
|
|
9
8
|
MissingTestCaseParamsError,
|
|
10
|
-
MismatchedTestCaseInputsError,
|
|
11
9
|
)
|
|
10
|
+
from deepeval.utils import convert_to_multi_modal_array
|
|
12
11
|
from deepeval.models import (
|
|
13
12
|
DeepEvalBaseLLM,
|
|
14
|
-
DeepEvalBaseMLLM,
|
|
15
13
|
GPTModel,
|
|
16
14
|
AnthropicModel,
|
|
17
15
|
AzureOpenAIModel,
|
|
@@ -22,10 +20,6 @@ from deepeval.models import (
|
|
|
22
20
|
OllamaEmbeddingModel,
|
|
23
21
|
LocalEmbeddingModel,
|
|
24
22
|
GeminiModel,
|
|
25
|
-
MultimodalOpenAIModel,
|
|
26
|
-
MultimodalGeminiModel,
|
|
27
|
-
MultimodalOllamaModel,
|
|
28
|
-
MultimodalAzureOpenAIMLLMModel,
|
|
29
23
|
AmazonBedrockModel,
|
|
30
24
|
LiteLLMModel,
|
|
31
25
|
KimiModel,
|
|
@@ -45,11 +39,8 @@ from deepeval.metrics import (
|
|
|
45
39
|
)
|
|
46
40
|
from deepeval.models.base_model import DeepEvalBaseEmbeddingModel
|
|
47
41
|
from deepeval.test_case import (
|
|
48
|
-
Turn,
|
|
49
42
|
LLMTestCase,
|
|
50
43
|
LLMTestCaseParams,
|
|
51
|
-
MLLMTestCase,
|
|
52
|
-
MLLMTestCaseParams,
|
|
53
44
|
ConversationalTestCase,
|
|
54
45
|
MLLMImage,
|
|
55
46
|
Turn,
|
|
@@ -58,6 +49,13 @@ from deepeval.test_case import (
|
|
|
58
49
|
TurnParams,
|
|
59
50
|
)
|
|
60
51
|
|
|
52
|
+
MULTIMODAL_SUPPORTED_MODELS = [
|
|
53
|
+
GPTModel,
|
|
54
|
+
GeminiModel,
|
|
55
|
+
OllamaModel,
|
|
56
|
+
AzureOpenAIModel,
|
|
57
|
+
]
|
|
58
|
+
|
|
61
59
|
|
|
62
60
|
def copy_metrics(
|
|
63
61
|
metrics: List[
|
|
@@ -201,7 +199,20 @@ def check_conversational_test_case_params(
|
|
|
201
199
|
test_case_params: List[TurnParams],
|
|
202
200
|
metric: BaseConversationalMetric,
|
|
203
201
|
require_chatbot_role: bool = False,
|
|
202
|
+
model: Optional[DeepEvalBaseLLM] = None,
|
|
203
|
+
multimodal: Optional[bool] = False,
|
|
204
204
|
):
|
|
205
|
+
if multimodal:
|
|
206
|
+
if not model or not model.supports_multimodal():
|
|
207
|
+
if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
|
|
214
|
+
)
|
|
215
|
+
|
|
205
216
|
if isinstance(test_case, ConversationalTestCase) is False:
|
|
206
217
|
error_str = f"Unable to evaluate test cases that are not of type 'ConversationalTestCase' using the conversational '{metric.__name__}' metric."
|
|
207
218
|
metric.error = error_str
|
|
@@ -289,15 +300,26 @@ def check_arena_test_case_params(
|
|
|
289
300
|
|
|
290
301
|
|
|
291
302
|
def check_mllm_test_case_params(
|
|
292
|
-
test_case:
|
|
293
|
-
test_case_params: List[
|
|
303
|
+
test_case: LLMTestCase,
|
|
304
|
+
test_case_params: List[LLMTestCaseParams],
|
|
294
305
|
input_image_count: Optional[int],
|
|
295
306
|
actual_output_image_count: Optional[int],
|
|
296
307
|
metric: BaseMetric,
|
|
308
|
+
model: Optional[DeepEvalBaseLLM] = None,
|
|
297
309
|
):
|
|
310
|
+
if not model or not model.supports_multimodal():
|
|
311
|
+
if model and type(model) in MULTIMODAL_SUPPORTED_MODELS:
|
|
312
|
+
raise ValueError(
|
|
313
|
+
f"The evaluation model {model.name} does not support multimodal evaluations at the moment. Available multi-modal models for the {model.__class__.__name__} provider includes {', '.join(model.__class__.valid_multimodal_models)}."
|
|
314
|
+
)
|
|
315
|
+
else:
|
|
316
|
+
raise ValueError(
|
|
317
|
+
f"The evaluation model {model.name} does not support multimodal inputs, please use one of the following evaluation models: {', '.join([cls.__name__ for cls in MULTIMODAL_SUPPORTED_MODELS])}"
|
|
318
|
+
)
|
|
319
|
+
|
|
298
320
|
if input_image_count:
|
|
299
321
|
count = 0
|
|
300
|
-
for ele in test_case.input:
|
|
322
|
+
for ele in convert_to_multi_modal_array(test_case.input):
|
|
301
323
|
if isinstance(ele, MLLMImage):
|
|
302
324
|
count += 1
|
|
303
325
|
if count != input_image_count:
|
|
@@ -306,18 +328,13 @@ def check_mllm_test_case_params(
|
|
|
306
328
|
|
|
307
329
|
if actual_output_image_count:
|
|
308
330
|
count = 0
|
|
309
|
-
for ele in test_case.actual_output:
|
|
331
|
+
for ele in convert_to_multi_modal_array(test_case.actual_output):
|
|
310
332
|
if isinstance(ele, MLLMImage):
|
|
311
333
|
count += 1
|
|
312
334
|
if count != actual_output_image_count:
|
|
313
335
|
error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
|
|
314
336
|
raise ValueError(error_str)
|
|
315
337
|
|
|
316
|
-
if isinstance(test_case, MLLMTestCase) is False:
|
|
317
|
-
error_str = f"Unable to evaluate test cases that are not of type 'MLLMTestCase' using the '{metric.__name__}' metric."
|
|
318
|
-
metric.error = error_str
|
|
319
|
-
raise ValueError(error_str)
|
|
320
|
-
|
|
321
338
|
missing_params = []
|
|
322
339
|
for param in test_case_params:
|
|
323
340
|
if getattr(test_case, param.value) is None:
|
|
@@ -339,8 +356,8 @@ def check_mllm_test_case_params(
|
|
|
339
356
|
|
|
340
357
|
|
|
341
358
|
def check_mllm_test_cases_params(
|
|
342
|
-
test_cases: List[
|
|
343
|
-
test_case_params: List[
|
|
359
|
+
test_cases: List[LLMTestCase],
|
|
360
|
+
test_case_params: List[LLMTestCaseParams],
|
|
344
361
|
input_image_count: Optional[int],
|
|
345
362
|
actual_output_image_count: Optional[int],
|
|
346
363
|
metric: BaseMetric,
|
|
@@ -459,7 +476,7 @@ def initialize_model(
|
|
|
459
476
|
elif should_use_local_model():
|
|
460
477
|
return LocalModel(), True
|
|
461
478
|
elif should_use_azure_openai():
|
|
462
|
-
return AzureOpenAIModel(
|
|
479
|
+
return AzureOpenAIModel(model=model), True
|
|
463
480
|
elif should_use_moonshot_model():
|
|
464
481
|
return KimiModel(model=model), True
|
|
465
482
|
elif should_use_grok_model():
|
|
@@ -501,42 +518,6 @@ def is_native_model(
|
|
|
501
518
|
###############################################
|
|
502
519
|
|
|
503
520
|
|
|
504
|
-
def initialize_multimodal_model(
|
|
505
|
-
model: Optional[Union[str, DeepEvalBaseMLLM]] = None,
|
|
506
|
-
) -> Tuple[DeepEvalBaseLLM, bool]:
|
|
507
|
-
"""
|
|
508
|
-
Returns a tuple of (initialized DeepEvalBaseMLLM, using_native_model boolean)
|
|
509
|
-
"""
|
|
510
|
-
if is_native_mllm(model):
|
|
511
|
-
return model, True
|
|
512
|
-
if isinstance(model, DeepEvalBaseMLLM):
|
|
513
|
-
return model, False
|
|
514
|
-
if should_use_gemini_model():
|
|
515
|
-
return MultimodalGeminiModel(), True
|
|
516
|
-
if should_use_ollama_model():
|
|
517
|
-
return MultimodalOllamaModel(), True
|
|
518
|
-
elif should_use_azure_openai():
|
|
519
|
-
return MultimodalAzureOpenAIMLLMModel(model_name=model), True
|
|
520
|
-
elif isinstance(model, str) or model is None:
|
|
521
|
-
return MultimodalOpenAIModel(model=model), True
|
|
522
|
-
raise TypeError(
|
|
523
|
-
f"Unsupported type for model: {type(model)}. Expected None, str, DeepEvalBaseMLLM, MultimodalOpenAIModel, MultimodalOllamaModel."
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
def is_native_mllm(
|
|
528
|
-
model: Optional[Union[str, DeepEvalBaseLLM]] = None,
|
|
529
|
-
) -> bool:
|
|
530
|
-
if (
|
|
531
|
-
isinstance(model, MultimodalOpenAIModel)
|
|
532
|
-
or isinstance(model, MultimodalOllamaModel)
|
|
533
|
-
or isinstance(model, MultimodalGeminiModel)
|
|
534
|
-
):
|
|
535
|
-
return True
|
|
536
|
-
else:
|
|
537
|
-
return False
|
|
538
|
-
|
|
539
|
-
|
|
540
521
|
###############################################
|
|
541
522
|
# Embedding Model
|
|
542
523
|
###############################################
|
deepeval/models/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from deepeval.models.base_model import (
|
|
2
2
|
DeepEvalBaseModel,
|
|
3
3
|
DeepEvalBaseLLM,
|
|
4
|
-
DeepEvalBaseMLLM,
|
|
5
4
|
DeepEvalBaseEmbeddingModel,
|
|
6
5
|
)
|
|
7
6
|
from deepeval.models.llms import (
|
|
@@ -17,12 +16,6 @@ from deepeval.models.llms import (
|
|
|
17
16
|
GrokModel,
|
|
18
17
|
DeepSeekModel,
|
|
19
18
|
)
|
|
20
|
-
from deepeval.models.mlllms import (
|
|
21
|
-
MultimodalOpenAIModel,
|
|
22
|
-
MultimodalOllamaModel,
|
|
23
|
-
MultimodalGeminiModel,
|
|
24
|
-
MultimodalAzureOpenAIMLLMModel,
|
|
25
|
-
)
|
|
26
19
|
from deepeval.models.embedding_models import (
|
|
27
20
|
OpenAIEmbeddingModel,
|
|
28
21
|
AzureOpenAIEmbeddingModel,
|
|
@@ -33,7 +26,6 @@ from deepeval.models.embedding_models import (
|
|
|
33
26
|
__all__ = [
|
|
34
27
|
"DeepEvalBaseModel",
|
|
35
28
|
"DeepEvalBaseLLM",
|
|
36
|
-
"DeepEvalBaseMLLM",
|
|
37
29
|
"DeepEvalBaseEmbeddingModel",
|
|
38
30
|
"GPTModel",
|
|
39
31
|
"AzureOpenAIModel",
|
|
@@ -46,10 +38,6 @@ __all__ = [
|
|
|
46
38
|
"KimiModel",
|
|
47
39
|
"GrokModel",
|
|
48
40
|
"DeepSeekModel",
|
|
49
|
-
"MultimodalOpenAIModel",
|
|
50
|
-
"MultimodalOllamaModel",
|
|
51
|
-
"MultimodalGeminiModel",
|
|
52
|
-
"MultimodalAzureOpenAIMLLMModel",
|
|
53
41
|
"OpenAIEmbeddingModel",
|
|
54
42
|
"AzureOpenAIEmbeddingModel",
|
|
55
43
|
"LocalEmbeddingModel",
|
deepeval/models/base_model.py
CHANGED
|
@@ -31,9 +31,9 @@ class DeepEvalBaseModel(ABC):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class DeepEvalBaseLLM(ABC):
|
|
34
|
-
def __init__(self,
|
|
35
|
-
self.
|
|
36
|
-
self.model = self.load_model(
|
|
34
|
+
def __init__(self, model: Optional[str] = None, *args, **kwargs):
|
|
35
|
+
self.name = parse_model_name(model)
|
|
36
|
+
self.model = self.load_model()
|
|
37
37
|
|
|
38
38
|
@abstractmethod
|
|
39
39
|
def load_model(self, *args, **kwargs) -> "DeepEvalBaseLLM":
|
|
@@ -62,6 +62,13 @@ class DeepEvalBaseLLM(ABC):
|
|
|
62
62
|
"""
|
|
63
63
|
pass
|
|
64
64
|
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def get_model_name(self, *args, **kwargs) -> str:
|
|
67
|
+
return self.name
|
|
68
|
+
|
|
69
|
+
def supports_multimodal(self) -> bool:
|
|
70
|
+
return False
|
|
71
|
+
|
|
65
72
|
def batch_generate(self, *args, **kwargs) -> List[str]:
|
|
66
73
|
"""Runs the model to output LLM responses.
|
|
67
74
|
|
|
@@ -72,43 +79,14 @@ class DeepEvalBaseLLM(ABC):
|
|
|
72
79
|
"batch_generate is not implemented for this model"
|
|
73
80
|
)
|
|
74
81
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
pass
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class DeepEvalBaseMLLM(ABC):
|
|
81
|
-
def __init__(self, model_name: Optional[str] = None, *args, **kwargs):
|
|
82
|
-
self.model_name = parse_model_name(model_name)
|
|
83
|
-
|
|
84
|
-
@abstractmethod
|
|
85
|
-
def generate(self, *args, **kwargs) -> str:
|
|
86
|
-
"""Runs the model to output MLLM response.
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
A string.
|
|
90
|
-
"""
|
|
91
|
-
pass
|
|
92
|
-
|
|
93
|
-
@abstractmethod
|
|
94
|
-
async def a_generate(self, *args, **kwargs) -> str:
|
|
95
|
-
"""Runs the model to output MLLM response.
|
|
96
|
-
|
|
97
|
-
Returns:
|
|
98
|
-
A string.
|
|
99
|
-
"""
|
|
100
|
-
pass
|
|
101
|
-
|
|
102
|
-
@abstractmethod
|
|
103
|
-
def get_model_name(self, *args, **kwargs) -> str:
|
|
104
|
-
pass
|
|
82
|
+
def supports_multimodal(self):
|
|
83
|
+
return False
|
|
105
84
|
|
|
106
85
|
|
|
107
86
|
class DeepEvalBaseEmbeddingModel(ABC):
|
|
108
|
-
def __init__(self,
|
|
109
|
-
self.
|
|
110
|
-
|
|
111
|
-
self.model = self.load_model(*args, **kwargs)
|
|
87
|
+
def __init__(self, model: Optional[str] = None, *args, **kwargs):
|
|
88
|
+
self.name = parse_model_name(model)
|
|
89
|
+
self.model = self.load_model()
|
|
112
90
|
|
|
113
91
|
@abstractmethod
|
|
114
92
|
def load_model(self, *args, **kwargs) -> "DeepEvalBaseEmbeddingModel":
|
|
@@ -157,4 +135,4 @@ class DeepEvalBaseEmbeddingModel(ABC):
|
|
|
157
135
|
|
|
158
136
|
@abstractmethod
|
|
159
137
|
def get_model_name(self, *args, **kwargs) -> str:
|
|
160
|
-
|
|
138
|
+
return self.name
|
|
@@ -2,3 +2,10 @@ from .azure_embedding_model import AzureOpenAIEmbeddingModel
|
|
|
2
2
|
from .openai_embedding_model import OpenAIEmbeddingModel
|
|
3
3
|
from .local_embedding_model import LocalEmbeddingModel
|
|
4
4
|
from .ollama_embedding_model import OllamaEmbeddingModel
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AzureOpenAIEmbeddingModel",
|
|
8
|
+
"OpenAIEmbeddingModel",
|
|
9
|
+
"LocalEmbeddingModel",
|
|
10
|
+
"OllamaEmbeddingModel",
|
|
11
|
+
]
|
|
@@ -1,55 +1,85 @@
|
|
|
1
1
|
from typing import Dict, List, Optional
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
KEY_FILE_HANDLER,
|
|
7
|
-
)
|
|
3
|
+
from pydantic import SecretStr
|
|
4
|
+
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
8
6
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
9
7
|
from deepeval.models.retry_policy import (
|
|
10
8
|
create_retry_decorator,
|
|
11
9
|
sdk_retries_for,
|
|
12
10
|
)
|
|
13
11
|
from deepeval.constants import ProviderSlug as PS
|
|
12
|
+
from deepeval.models.utils import (
|
|
13
|
+
require_secret_api_key,
|
|
14
|
+
normalize_kwargs_and_extract_aliases,
|
|
15
|
+
)
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
retry_azure = create_retry_decorator(PS.AZURE)
|
|
17
19
|
|
|
20
|
+
_ALIAS_MAP = {
|
|
21
|
+
"api_key": ["openai_api_key"],
|
|
22
|
+
"base_url": ["azure_endpoint"],
|
|
23
|
+
"deployment_name": ["azure_deployment"],
|
|
24
|
+
}
|
|
25
|
+
|
|
18
26
|
|
|
19
27
|
class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
20
28
|
def __init__(
|
|
21
29
|
self,
|
|
22
|
-
openai_api_key: Optional[str] = None,
|
|
23
|
-
openai_api_version: Optional[str] = None,
|
|
24
|
-
azure_endpoint: Optional[str] = None,
|
|
25
|
-
azure_deployment: Optional[str] = None,
|
|
26
30
|
model: Optional[str] = None,
|
|
31
|
+
api_key: Optional[str] = None,
|
|
32
|
+
base_url: Optional[str] = None,
|
|
33
|
+
deployment_name: Optional[str] = None,
|
|
34
|
+
openai_api_version: Optional[str] = None,
|
|
27
35
|
generation_kwargs: Optional[Dict] = None,
|
|
28
|
-
**
|
|
36
|
+
**kwargs,
|
|
29
37
|
):
|
|
30
|
-
|
|
31
|
-
|
|
38
|
+
normalized_kwargs, alias_values = normalize_kwargs_and_extract_aliases(
|
|
39
|
+
"AzureOpenAIEmbeddingModel",
|
|
40
|
+
kwargs,
|
|
41
|
+
_ALIAS_MAP,
|
|
32
42
|
)
|
|
43
|
+
|
|
44
|
+
# re-map depricated keywords to re-named positional args
|
|
45
|
+
if api_key is None and "api_key" in alias_values:
|
|
46
|
+
api_key = alias_values["api_key"]
|
|
47
|
+
if base_url is None and "base_url" in alias_values:
|
|
48
|
+
base_url = alias_values["base_url"]
|
|
49
|
+
if deployment_name is None and "deployment_name" in alias_values:
|
|
50
|
+
deployment_name = alias_values["deployment_name"]
|
|
51
|
+
|
|
52
|
+
settings = get_settings()
|
|
53
|
+
|
|
54
|
+
if api_key is not None:
|
|
55
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
56
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
57
|
+
else:
|
|
58
|
+
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
59
|
+
|
|
33
60
|
self.openai_api_version = (
|
|
34
|
-
openai_api_version
|
|
35
|
-
or KEY_FILE_HANDLER.fetch_data(ModelKeyValues.OPENAI_API_VERSION)
|
|
61
|
+
openai_api_version or settings.OPENAI_API_VERSION
|
|
36
62
|
)
|
|
37
|
-
self.
|
|
38
|
-
|
|
63
|
+
self.base_url = (
|
|
64
|
+
base_url
|
|
65
|
+
or settings.AZURE_OPENAI_ENDPOINT
|
|
66
|
+
and str(settings.AZURE_OPENAI_ENDPOINT)
|
|
39
67
|
)
|
|
40
|
-
|
|
41
|
-
|
|
68
|
+
|
|
69
|
+
self.deployment_name = (
|
|
70
|
+
deployment_name or settings.AZURE_EMBEDDING_DEPLOYMENT_NAME
|
|
42
71
|
)
|
|
43
|
-
|
|
44
|
-
self.
|
|
72
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
73
|
+
self.kwargs = normalized_kwargs
|
|
74
|
+
model = model or self.deployment_name
|
|
45
75
|
self.generation_kwargs = generation_kwargs or {}
|
|
46
|
-
super().__init__(
|
|
76
|
+
super().__init__(model)
|
|
47
77
|
|
|
48
78
|
@retry_azure
|
|
49
79
|
def embed_text(self, text: str) -> List[float]:
|
|
50
80
|
client = self.load_model(async_mode=False)
|
|
51
81
|
response = client.embeddings.create(
|
|
52
|
-
input=text, model=self.
|
|
82
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
53
83
|
)
|
|
54
84
|
return response.data[0].embedding
|
|
55
85
|
|
|
@@ -57,7 +87,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
57
87
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
58
88
|
client = self.load_model(async_mode=False)
|
|
59
89
|
response = client.embeddings.create(
|
|
60
|
-
input=texts, model=self.
|
|
90
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
61
91
|
)
|
|
62
92
|
return [item.embedding for item in response.data]
|
|
63
93
|
|
|
@@ -65,7 +95,7 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
65
95
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
66
96
|
client = self.load_model(async_mode=True)
|
|
67
97
|
response = await client.embeddings.create(
|
|
68
|
-
input=text, model=self.
|
|
98
|
+
input=text, model=self.name, **self.generation_kwargs
|
|
69
99
|
)
|
|
70
100
|
return response.data[0].embedding
|
|
71
101
|
|
|
@@ -73,28 +103,32 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
73
103
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
74
104
|
client = self.load_model(async_mode=True)
|
|
75
105
|
response = await client.embeddings.create(
|
|
76
|
-
input=texts, model=self.
|
|
106
|
+
input=texts, model=self.name, **self.generation_kwargs
|
|
77
107
|
)
|
|
78
108
|
return [item.embedding for item in response.data]
|
|
79
109
|
|
|
80
|
-
def get_model_name(self) -> str:
|
|
81
|
-
return self.model_name
|
|
82
|
-
|
|
83
110
|
def load_model(self, async_mode: bool = False):
|
|
84
111
|
if not async_mode:
|
|
85
112
|
return self._build_client(AzureOpenAI)
|
|
86
113
|
return self._build_client(AsyncAzureOpenAI)
|
|
87
114
|
|
|
88
115
|
def _build_client(self, cls):
|
|
89
|
-
|
|
116
|
+
api_key = require_secret_api_key(
|
|
117
|
+
self.api_key,
|
|
118
|
+
provider_label="AzureOpenAI",
|
|
119
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
120
|
+
param_hint="`api_key` to AzureOpenAIEmbeddingModel(...)",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
client_kwargs = self.kwargs.copy()
|
|
90
124
|
if not sdk_retries_for(PS.AZURE):
|
|
91
125
|
client_kwargs["max_retries"] = 0
|
|
92
126
|
|
|
93
127
|
client_init_kwargs = dict(
|
|
94
|
-
api_key=
|
|
128
|
+
api_key=api_key,
|
|
95
129
|
api_version=self.openai_api_version,
|
|
96
|
-
azure_endpoint=self.
|
|
97
|
-
azure_deployment=self.
|
|
130
|
+
azure_endpoint=self.base_url,
|
|
131
|
+
azure_deployment=self.deployment_name,
|
|
98
132
|
**client_kwargs,
|
|
99
133
|
)
|
|
100
134
|
try:
|
|
@@ -105,3 +139,6 @@ class AzureOpenAIEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
105
139
|
client_init_kwargs.pop("max_retries", None)
|
|
106
140
|
return cls(**client_init_kwargs)
|
|
107
141
|
raise
|
|
142
|
+
|
|
143
|
+
def get_model_name(self):
|
|
144
|
+
return f"{self.name} (Azure)"
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
from openai import OpenAI, AsyncOpenAI
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
|
+
from pydantic import SecretStr
|
|
3
4
|
|
|
4
|
-
from deepeval.
|
|
5
|
+
from deepeval.config.settings import get_settings
|
|
6
|
+
from deepeval.models.utils import (
|
|
7
|
+
require_secret_api_key,
|
|
8
|
+
)
|
|
5
9
|
from deepeval.models import DeepEvalBaseEmbeddingModel
|
|
6
10
|
from deepeval.models.retry_policy import (
|
|
7
11
|
create_retry_decorator,
|
|
@@ -17,30 +21,36 @@ retry_local = create_retry_decorator(PS.LOCAL)
|
|
|
17
21
|
class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
18
22
|
def __init__(
|
|
19
23
|
self,
|
|
24
|
+
model: Optional[str] = None,
|
|
20
25
|
api_key: Optional[str] = None,
|
|
21
26
|
base_url: Optional[str] = None,
|
|
22
|
-
model: Optional[str] = None,
|
|
23
27
|
generation_kwargs: Optional[Dict] = None,
|
|
24
|
-
**
|
|
28
|
+
**kwargs,
|
|
25
29
|
):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
|
|
31
|
+
settings = get_settings()
|
|
32
|
+
if api_key is not None:
|
|
33
|
+
# keep it secret, keep it safe from serializings, logging and alike
|
|
34
|
+
self.api_key: SecretStr | None = SecretStr(api_key)
|
|
35
|
+
else:
|
|
36
|
+
self.api_key = get_settings().LOCAL_EMBEDDING_API_KEY
|
|
37
|
+
|
|
38
|
+
self.base_url = (
|
|
39
|
+
base_url
|
|
40
|
+
or settings.LOCAL_EMBEDDING_BASE_URL
|
|
41
|
+
and str(settings.LOCAL_EMBEDDING_BASE_URL)
|
|
34
42
|
)
|
|
35
|
-
|
|
43
|
+
model = model or settings.LOCAL_EMBEDDING_MODEL_NAME
|
|
44
|
+
# Keep sanitized kwargs for client call to strip legacy keys
|
|
45
|
+
self.kwargs = kwargs
|
|
36
46
|
self.generation_kwargs = generation_kwargs or {}
|
|
37
|
-
super().__init__(
|
|
47
|
+
super().__init__(model)
|
|
38
48
|
|
|
39
49
|
@retry_local
|
|
40
50
|
def embed_text(self, text: str) -> List[float]:
|
|
41
51
|
embedding_model = self.load_model()
|
|
42
52
|
response = embedding_model.embeddings.create(
|
|
43
|
-
model=self.
|
|
53
|
+
model=self.name, input=[text], **self.generation_kwargs
|
|
44
54
|
)
|
|
45
55
|
return response.data[0].embedding
|
|
46
56
|
|
|
@@ -48,7 +58,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
48
58
|
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
49
59
|
embedding_model = self.load_model()
|
|
50
60
|
response = embedding_model.embeddings.create(
|
|
51
|
-
model=self.
|
|
61
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
52
62
|
)
|
|
53
63
|
return [data.embedding for data in response.data]
|
|
54
64
|
|
|
@@ -56,7 +66,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
56
66
|
async def a_embed_text(self, text: str) -> List[float]:
|
|
57
67
|
embedding_model = self.load_model(async_mode=True)
|
|
58
68
|
response = await embedding_model.embeddings.create(
|
|
59
|
-
model=self.
|
|
69
|
+
model=self.name, input=[text], **self.generation_kwargs
|
|
60
70
|
)
|
|
61
71
|
return response.data[0].embedding
|
|
62
72
|
|
|
@@ -64,7 +74,7 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
64
74
|
async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
65
75
|
embedding_model = self.load_model(async_mode=True)
|
|
66
76
|
response = await embedding_model.embeddings.create(
|
|
67
|
-
model=self.
|
|
77
|
+
model=self.name, input=texts, **self.generation_kwargs
|
|
68
78
|
)
|
|
69
79
|
return [data.embedding for data in response.data]
|
|
70
80
|
|
|
@@ -72,21 +82,25 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
72
82
|
# Model
|
|
73
83
|
###############################################
|
|
74
84
|
|
|
75
|
-
def get_model_name(self):
|
|
76
|
-
return self.model_name
|
|
77
|
-
|
|
78
85
|
def load_model(self, async_mode: bool = False):
|
|
79
86
|
if not async_mode:
|
|
80
87
|
return self._build_client(OpenAI)
|
|
81
88
|
return self._build_client(AsyncOpenAI)
|
|
82
89
|
|
|
83
90
|
def _build_client(self, cls):
|
|
84
|
-
|
|
91
|
+
api_key = require_secret_api_key(
|
|
92
|
+
self.api_key,
|
|
93
|
+
provider_label="OpenAI",
|
|
94
|
+
env_var_name="LOCAL_EMBEDDING_API_KEY",
|
|
95
|
+
param_hint="`api_key` to LocalEmbeddingModel(...)",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
client_kwargs = self.kwargs.copy()
|
|
85
99
|
if not sdk_retries_for(PS.LOCAL):
|
|
86
100
|
client_kwargs["max_retries"] = 0
|
|
87
101
|
|
|
88
102
|
client_init_kwargs = dict(
|
|
89
|
-
api_key=
|
|
103
|
+
api_key=api_key,
|
|
90
104
|
base_url=self.base_url,
|
|
91
105
|
**client_kwargs,
|
|
92
106
|
)
|
|
@@ -98,3 +112,6 @@ class LocalEmbeddingModel(DeepEvalBaseEmbeddingModel):
|
|
|
98
112
|
client_init_kwargs.pop("max_retries", None)
|
|
99
113
|
return cls(**client_init_kwargs)
|
|
100
114
|
raise
|
|
115
|
+
|
|
116
|
+
def get_model_name(self):
|
|
117
|
+
return f"{self.name} (Local Model)"
|