deepeval 3.7.4__py3-none-any.whl → 3.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/dataset/golden.py +54 -2
  3. deepeval/evaluate/evaluate.py +16 -8
  4. deepeval/evaluate/execute.py +70 -26
  5. deepeval/evaluate/utils.py +26 -22
  6. deepeval/integrations/pydantic_ai/agent.py +19 -2
  7. deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  8. deepeval/metrics/__init__.py +14 -12
  9. deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  10. deepeval/metrics/answer_relevancy/template.py +188 -92
  11. deepeval/metrics/base_metric.py +2 -5
  12. deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  13. deepeval/metrics/contextual_precision/template.py +115 -66
  14. deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  15. deepeval/metrics/contextual_recall/template.py +106 -55
  16. deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  17. deepeval/metrics/contextual_relevancy/template.py +87 -58
  18. deepeval/metrics/dag/templates.py +2 -2
  19. deepeval/metrics/faithfulness/faithfulness.py +70 -27
  20. deepeval/metrics/faithfulness/schema.py +1 -1
  21. deepeval/metrics/faithfulness/template.py +200 -115
  22. deepeval/metrics/g_eval/utils.py +2 -2
  23. deepeval/metrics/indicator.py +4 -4
  24. deepeval/metrics/multimodal_metrics/__init__.py +0 -18
  25. deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  26. deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  27. deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  28. deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  29. deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  30. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +63 -78
  31. deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  32. deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  33. deepeval/metrics/ragas.py +3 -3
  34. deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  35. deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  36. deepeval/metrics/turn_contextual_precision/template.py +187 -0
  37. deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  38. deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  39. deepeval/metrics/turn_contextual_recall/template.py +178 -0
  40. deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  41. deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_contextual_relevancy}/schema.py +7 -1
  42. deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  43. deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  44. deepeval/metrics/{multimodal_metrics/multimodal_faithfulness → turn_faithfulness}/schema.py +11 -3
  45. deepeval/metrics/turn_faithfulness/template.py +218 -0
  46. deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  47. deepeval/metrics/utils.py +39 -58
  48. deepeval/models/__init__.py +0 -12
  49. deepeval/models/base_model.py +16 -38
  50. deepeval/models/embedding_models/__init__.py +7 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  52. deepeval/models/embedding_models/local_embedding_model.py +18 -14
  53. deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  54. deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  55. deepeval/models/llms/amazon_bedrock_model.py +1 -2
  56. deepeval/models/llms/anthropic_model.py +44 -23
  57. deepeval/models/llms/azure_model.py +121 -36
  58. deepeval/models/llms/deepseek_model.py +18 -13
  59. deepeval/models/llms/gemini_model.py +129 -43
  60. deepeval/models/llms/grok_model.py +18 -13
  61. deepeval/models/llms/kimi_model.py +18 -13
  62. deepeval/models/llms/litellm_model.py +42 -22
  63. deepeval/models/llms/local_model.py +12 -7
  64. deepeval/models/llms/ollama_model.py +114 -12
  65. deepeval/models/llms/openai_model.py +137 -41
  66. deepeval/models/llms/portkey_model.py +24 -7
  67. deepeval/models/llms/utils.py +5 -3
  68. deepeval/models/retry_policy.py +17 -14
  69. deepeval/models/utils.py +46 -1
  70. deepeval/optimizer/__init__.py +5 -0
  71. deepeval/optimizer/algorithms/__init__.py +6 -0
  72. deepeval/optimizer/algorithms/base.py +29 -0
  73. deepeval/optimizer/algorithms/configs.py +18 -0
  74. deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  75. deepeval/{optimization/copro/loop.py → optimizer/algorithms/copro/copro.py} +112 -113
  76. deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  77. deepeval/{optimization/gepa/loop.py → optimizer/algorithms/gepa/gepa.py} +175 -115
  78. deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  79. deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  80. deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  81. deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  82. deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  83. deepeval/{optimization/simba/loop.py → optimizer/algorithms/simba/simba.py} +128 -112
  84. deepeval/{optimization → optimizer}/configs.py +5 -8
  85. deepeval/{optimization/policies/selection.py → optimizer/policies.py} +63 -2
  86. deepeval/optimizer/prompt_optimizer.py +263 -0
  87. deepeval/optimizer/rewriter/__init__.py +5 -0
  88. deepeval/optimizer/rewriter/rewriter.py +124 -0
  89. deepeval/optimizer/rewriter/utils.py +214 -0
  90. deepeval/optimizer/scorer/__init__.py +5 -0
  91. deepeval/optimizer/scorer/base.py +86 -0
  92. deepeval/optimizer/scorer/scorer.py +316 -0
  93. deepeval/optimizer/scorer/utils.py +30 -0
  94. deepeval/optimizer/types.py +148 -0
  95. deepeval/{optimization → optimizer}/utils.py +47 -165
  96. deepeval/prompt/prompt.py +5 -9
  97. deepeval/test_case/__init__.py +1 -3
  98. deepeval/test_case/api.py +12 -10
  99. deepeval/test_case/conversational_test_case.py +19 -1
  100. deepeval/test_case/llm_test_case.py +152 -1
  101. deepeval/test_case/utils.py +4 -8
  102. deepeval/test_run/api.py +15 -14
  103. deepeval/test_run/test_run.py +3 -3
  104. deepeval/tracing/patchers.py +9 -4
  105. deepeval/tracing/tracing.py +2 -2
  106. deepeval/utils.py +65 -0
  107. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/METADATA +1 -4
  108. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/RECORD +116 -125
  109. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  110. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  111. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  112. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  113. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  114. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  115. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  116. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  117. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  118. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  119. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  120. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  121. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  122. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  123. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  124. deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  125. deepeval/models/mlllms/__init__.py +0 -4
  126. deepeval/models/mlllms/azure_model.py +0 -343
  127. deepeval/models/mlllms/gemini_model.py +0 -313
  128. deepeval/models/mlllms/ollama_model.py +0 -175
  129. deepeval/models/mlllms/openai_model.py +0 -309
  130. deepeval/optimization/__init__.py +0 -13
  131. deepeval/optimization/adapters/__init__.py +0 -2
  132. deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  133. deepeval/optimization/aggregates.py +0 -14
  134. deepeval/optimization/copro/configs.py +0 -31
  135. deepeval/optimization/gepa/__init__.py +0 -7
  136. deepeval/optimization/gepa/configs.py +0 -115
  137. deepeval/optimization/miprov2/configs.py +0 -134
  138. deepeval/optimization/miprov2/loop.py +0 -785
  139. deepeval/optimization/mutations/__init__.py +0 -0
  140. deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  141. deepeval/optimization/policies/__init__.py +0 -16
  142. deepeval/optimization/policies/tie_breaker.py +0 -67
  143. deepeval/optimization/prompt_optimizer.py +0 -462
  144. deepeval/optimization/simba/__init__.py +0 -0
  145. deepeval/optimization/simba/configs.py +0 -33
  146. deepeval/optimization/types.py +0 -361
  147. deepeval/test_case/mllm_test_case.py +0 -170
  148. /deepeval/metrics/{multimodal_metrics/multimodal_answer_relevancy → turn_contextual_precision}/__init__.py +0 -0
  149. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_precision → turn_contextual_recall}/__init__.py +0 -0
  150. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_recall → turn_contextual_relevancy}/__init__.py +0 -0
  151. /deepeval/metrics/{multimodal_metrics/multimodal_contextual_relevancy → turn_faithfulness}/__init__.py +0 -0
  152. /deepeval/{optimization → optimizer/algorithms}/simba/types.py +0 -0
  153. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/LICENSE.md +0 -0
  154. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/WHEEL +0 -0
  155. {deepeval-3.7.4.dist-info → deepeval-3.7.5.dist-info}/entry_points.txt +0 -0
deepeval/test_run/api.py CHANGED
@@ -18,20 +18,21 @@ class LLMApiTestCase(BaseModel):
18
18
  token_cost: Optional[float] = Field(None, alias="tokenCost")
19
19
  completion_time: Optional[float] = Field(None, alias="completionTime")
20
20
  tags: Optional[List[str]] = Field(None)
21
- multimodal_input: Optional[List[Union[str, MLLMImage]]] = Field(
22
- None, alias="multimodalInput"
23
- )
24
- multimodal_input_actual_output: Optional[List[Union[str, MLLMImage]]] = (
25
- Field(None, alias="multimodalActualOutput")
26
- )
27
- multimodal_expected_output: Optional[List[Union[str, MLLMImage]]] = Field(
28
- None, alias="multimodalExpectedOutput"
29
- )
30
- multimodal_retrieval_context: Optional[List[Union[str, MLLMImage]]] = Field(
31
- None, alias="multimodalRetrievalContext"
32
- )
33
- multimodal_context: Optional[List[Union[str, MLLMImage]]] = Field(
34
- None, alias="multimodalContext"
21
+ # multimodal_input: Optional[str] = Field(None, alias="multimodalInput")
22
+ # multimodal_input_actual_output: Optional[str] = Field(
23
+ # None, alias="multimodalActualOutput"
24
+ # )
25
+ # multimodal_expected_output: Optional[str] = Field(
26
+ # None, alias="multimodalExpectedOutput"
27
+ # )
28
+ # multimodal_retrieval_context: Optional[List[str]] = Field(
29
+ # None, alias="multimodalRetrievalContext"
30
+ # )
31
+ # multimodal_context: Optional[List[str]] = Field(
32
+ # None, alias="multimodalContext"
33
+ # )
34
+ images_mapping: Optional[Dict[str, MLLMImage]] = Field(
35
+ None, alias="imagesMapping"
35
36
  )
36
37
 
37
38
  # make these optional, not all test cases in a conversation will be evaluated
@@ -21,7 +21,7 @@ from deepeval.test_run.api import (
21
21
  )
22
22
  from deepeval.tracing.utils import make_json_serializable
23
23
  from deepeval.tracing.api import SpanApiType, span_api_type_literals
24
- from deepeval.test_case import LLMTestCase, ConversationalTestCase, MLLMTestCase
24
+ from deepeval.test_case import LLMTestCase, ConversationalTestCase
25
25
  from deepeval.utils import (
26
26
  delete_file_if_exists,
27
27
  get_is_running_deepeval,
@@ -182,7 +182,7 @@ class TestRun(BaseModel):
182
182
 
183
183
  def set_dataset_properties(
184
184
  self,
185
- test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
185
+ test_case: Union[LLMTestCase, ConversationalTestCase],
186
186
  ):
187
187
  if self.dataset_alias is None:
188
188
  self.dataset_alias = test_case._dataset_alias
@@ -538,7 +538,7 @@ class TestRunManager:
538
538
  def update_test_run(
539
539
  self,
540
540
  api_test_case: Union[LLMApiTestCase, ConversationalApiTestCase],
541
- test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
541
+ test_case: Union[LLMTestCase, ConversationalTestCase],
542
542
  ):
543
543
  if (
544
544
  api_test_case.metrics_data is not None
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
 
3
- from anthropic import Anthropic
3
+ from typing import TYPE_CHECKING
4
+
4
5
  from openai import OpenAI
5
6
 
6
7
  from deepeval.tracing.context import update_current_span, update_llm_span
@@ -8,6 +9,10 @@ from deepeval.tracing.context import current_span_context
8
9
  from deepeval.tracing.types import LlmSpan
9
10
 
10
11
 
12
+ if TYPE_CHECKING:
13
+ from anthropic import Anthropic
14
+
15
+
11
16
  def patch_openai_client(client: OpenAI):
12
17
 
13
18
  original_methods = {}
@@ -61,7 +66,7 @@ def patch_openai_client(client: OpenAI):
61
66
  output = None
62
67
  try:
63
68
  output = response.choices[0].message.content
64
- except Exception as e:
69
+ except Exception:
65
70
  pass
66
71
 
67
72
  # extract input output token counts
@@ -70,7 +75,7 @@ def patch_openai_client(client: OpenAI):
70
75
  try:
71
76
  input_token_count = response.usage.prompt_tokens
72
77
  output_token_count = response.usage.completion_tokens
73
- except Exception as e:
78
+ except Exception:
74
79
  pass
75
80
 
76
81
  update_current_span(
@@ -86,7 +91,7 @@ def patch_openai_client(client: OpenAI):
86
91
  setattr(current_obj, method_name, wrapped_method)
87
92
 
88
93
 
89
- def patch_anthropic_client(client: Anthropic):
94
+ def patch_anthropic_client(client: "Anthropic"):
90
95
  """
91
96
  Patch an Anthropic client instance to add tracing capabilities.
92
97
 
@@ -19,7 +19,6 @@ import random
19
19
  import atexit
20
20
  import queue
21
21
  import uuid
22
- from anthropic import Anthropic
23
22
  from openai import OpenAI
24
23
  from rich.console import Console
25
24
  from rich.progress import Progress
@@ -74,6 +73,7 @@ from deepeval.tracing.trace_test_manager import trace_testing_manager
74
73
 
75
74
  if TYPE_CHECKING:
76
75
  from deepeval.dataset.golden import Golden
76
+ from anthropic import Anthropic
77
77
 
78
78
  EVAL_DUMMY_SPAN_NAME = "evals_iterator"
79
79
 
@@ -154,7 +154,7 @@ class TraceManager:
154
154
  environment: Optional[str] = None,
155
155
  sampling_rate: Optional[float] = None,
156
156
  confident_api_key: Optional[str] = None,
157
- anthropic_client: Optional[Anthropic] = None,
157
+ anthropic_client: Optional["Anthropic"] = None,
158
158
  openai_client: Optional[OpenAI] = None,
159
159
  tracing_enabled: Optional[bool] = None,
160
160
  ) -> None:
deepeval/utils.py CHANGED
@@ -14,6 +14,7 @@ import logging
14
14
 
15
15
  from contextvars import ContextVar
16
16
  from enum import Enum
17
+ from importlib import import_module
17
18
  from typing import Any, Dict, List, Optional, Protocol, Sequence, Union
18
19
  from collections.abc import Iterable
19
20
  from dataclasses import asdict, is_dataclass
@@ -537,6 +538,25 @@ def shorten(
537
538
  return stext[:cut] + suffix
538
539
 
539
540
 
541
+ def convert_to_multi_modal_array(input: Union[str, List[str]]):
542
+ from deepeval.test_case import MLLMImage
543
+
544
+ if isinstance(input, str):
545
+ return MLLMImage.parse_multimodal_string(input)
546
+ elif isinstance(input, list):
547
+ new_list = []
548
+ for context in input:
549
+ parsed_array = MLLMImage.parse_multimodal_string(context)
550
+ new_list.extend(parsed_array)
551
+ return new_list
552
+
553
+
554
+ def check_if_multimodal(input: str):
555
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
556
+ matches = list(re.finditer(pattern, input))
557
+ return bool(matches)
558
+
559
+
540
560
  def format_turn(
541
561
  turn: TurnLike,
542
562
  *,
@@ -829,7 +849,22 @@ def require_param(
829
849
  env_var_name: str,
830
850
  param_hint: str,
831
851
  ) -> Any:
852
+ """
853
+ Ensures that a required parameter is provided. If the parameter is `None`, raises a
854
+ `DeepEvalError` with a helpful message indicating the missing parameter and how to resolve it.
832
855
 
856
+ Args:
857
+ param (Optional[Any]): The parameter to validate.
858
+ provider_label (str): A label for the provider to be used in the error message.
859
+ env_var_name (str): The name of the environment variable where the parameter can be set.
860
+ param_hint (str): A hint for the parameter, usually the name of the argument.
861
+
862
+ Raises:
863
+ DeepEvalError: If the `param` is `None`, indicating that a required parameter is missing.
864
+
865
+ Returns:
866
+ Any: The value of `param` if it is provided.
867
+ """
833
868
  if param is None:
834
869
  raise DeepEvalError(
835
870
  f"{provider_label} is missing a required parameter. "
@@ -838,3 +873,33 @@ def require_param(
838
873
  )
839
874
 
840
875
  return param
876
+
877
+
878
+ def require_dependency(
879
+ module_name: str,
880
+ *,
881
+ provider_label: str,
882
+ install_hint: Optional[str] = None,
883
+ ) -> Any:
884
+ """
885
+ Imports an optional dependency module or raises a `DeepEvalError` if the module is not found.
886
+ The error message includes a suggestion on how to install the missing module.
887
+
888
+ Args:
889
+ module_name (str): The name of the module to import.
890
+ provider_label (str): A label for the provider to be used in the error message.
891
+ install_hint (Optional[str]): A hint on how to install the missing module, usually a pip command.
892
+
893
+ Raises:
894
+ DeepEvalError: If the module cannot be imported, indicating that the dependency is missing.
895
+
896
+ Returns:
897
+ Any: The imported module if successful.
898
+ """
899
+ try:
900
+ return import_module(module_name)
901
+ except ImportError as exc:
902
+ hint = install_hint or f"Install it with `pip install {module_name}`."
903
+ raise DeepEvalError(
904
+ f"{provider_label} requires the `{module_name}` package. {hint}"
905
+ ) from exc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.4
3
+ Version: 3.7.5
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -13,13 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Requires-Dist: aiohttp
16
- Requires-Dist: anthropic
17
16
  Requires-Dist: click (>=8.0.0,<8.3.0)
18
- Requires-Dist: google-genai (>=1.9.0,<2.0.0)
19
17
  Requires-Dist: grpcio (>=1.67.1,<2.0.0)
20
18
  Requires-Dist: jinja2
21
19
  Requires-Dist: nest_asyncio
22
- Requires-Dist: ollama
23
20
  Requires-Dist: openai
24
21
  Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
25
22
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)