deepeval 3.7.4__tar.gz → 3.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (577) hide show
  1. {deepeval-3.7.4 → deepeval-3.7.5}/PKG-INFO +1 -4
  2. deepeval-3.7.5/deepeval/_version.py +1 -0
  3. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/golden.py +54 -2
  4. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/evaluate.py +16 -8
  5. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/execute.py +70 -26
  6. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/utils.py +26 -22
  7. deepeval-3.7.5/deepeval/integrations/pydantic_ai/agent.py +38 -0
  8. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  9. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/__init__.py +14 -12
  10. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/answer_relevancy/answer_relevancy.py +74 -29
  11. deepeval-3.7.5/deepeval/metrics/answer_relevancy/template.py +206 -0
  12. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/base_metric.py +2 -5
  13. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_precision/contextual_precision.py +53 -15
  14. deepeval-3.7.5/deepeval/metrics/contextual_precision/template.py +133 -0
  15. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_recall/contextual_recall.py +50 -13
  16. deepeval-3.7.5/deepeval/metrics/contextual_recall/template.py +126 -0
  17. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +47 -15
  18. deepeval-3.7.5/deepeval/metrics/contextual_relevancy/template.py +106 -0
  19. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/templates.py +2 -2
  20. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/faithfulness/faithfulness.py +70 -27
  21. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/faithfulness/schema.py +1 -1
  22. deepeval-3.7.5/deepeval/metrics/faithfulness/template.py +225 -0
  23. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/g_eval/utils.py +2 -2
  24. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/indicator.py +4 -4
  25. deepeval-3.7.5/deepeval/metrics/multimodal_metrics/__init__.py +6 -0
  26. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +24 -17
  27. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +26 -21
  28. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +24 -17
  29. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +24 -17
  30. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +19 -19
  31. deepeval-3.7.5/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +133 -0
  32. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +20 -20
  33. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +71 -50
  34. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/ragas.py +3 -3
  35. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_correctness/tool_correctness.py +2 -2
  36. deepeval-3.7.5/deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  37. deepeval-3.7.5/deepeval/metrics/turn_contextual_precision/template.py +187 -0
  38. deepeval-3.7.5/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +550 -0
  39. deepeval-3.7.5/deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  40. deepeval-3.7.5/deepeval/metrics/turn_contextual_recall/template.py +178 -0
  41. deepeval-3.7.5/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +520 -0
  42. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy → deepeval-3.7.5/deepeval/metrics/turn_contextual_relevancy}/schema.py +7 -1
  43. deepeval-3.7.5/deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  44. deepeval-3.7.5/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +535 -0
  45. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness → deepeval-3.7.5/deepeval/metrics/turn_faithfulness}/schema.py +11 -3
  46. deepeval-3.7.5/deepeval/metrics/turn_faithfulness/template.py +218 -0
  47. deepeval-3.7.5/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +596 -0
  48. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/utils.py +39 -58
  49. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/__init__.py +0 -12
  50. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/base_model.py +16 -38
  51. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/embedding_models/__init__.py +7 -0
  52. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/embedding_models/azure_embedding_model.py +52 -28
  53. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/embedding_models/local_embedding_model.py +18 -14
  54. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/embedding_models/ollama_embedding_model.py +38 -16
  55. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/embedding_models/openai_embedding_model.py +40 -21
  56. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/amazon_bedrock_model.py +1 -2
  57. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/anthropic_model.py +44 -23
  58. {deepeval-3.7.4/deepeval/models/mlllms → deepeval-3.7.5/deepeval/models/llms}/azure_model.py +111 -70
  59. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/deepseek_model.py +18 -13
  60. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/gemini_model.py +129 -43
  61. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/grok_model.py +18 -13
  62. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/kimi_model.py +18 -13
  63. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/litellm_model.py +42 -22
  64. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/local_model.py +12 -7
  65. {deepeval-3.7.4/deepeval/models/mlllms → deepeval-3.7.5/deepeval/models/llms}/ollama_model.py +85 -44
  66. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/openai_model.py +137 -41
  67. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/portkey_model.py +24 -7
  68. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/utils.py +5 -3
  69. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/retry_policy.py +17 -14
  70. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/utils.py +46 -1
  71. deepeval-3.7.5/deepeval/optimizer/__init__.py +5 -0
  72. deepeval-3.7.5/deepeval/optimizer/algorithms/__init__.py +6 -0
  73. deepeval-3.7.5/deepeval/optimizer/algorithms/base.py +29 -0
  74. deepeval-3.7.5/deepeval/optimizer/algorithms/configs.py +18 -0
  75. deepeval-3.7.5/deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  76. deepeval-3.7.4/deepeval/optimization/copro/loop.py → deepeval-3.7.5/deepeval/optimizer/algorithms/copro/copro.py +112 -113
  77. deepeval-3.7.5/deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  78. deepeval-3.7.4/deepeval/optimization/gepa/loop.py → deepeval-3.7.5/deepeval/optimizer/algorithms/gepa/gepa.py +175 -115
  79. deepeval-3.7.5/deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  80. deepeval-3.7.5/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  81. deepeval-3.7.5/deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  82. deepeval-3.7.5/deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  83. deepeval-3.7.5/deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  84. deepeval-3.7.4/deepeval/optimization/simba/loop.py → deepeval-3.7.5/deepeval/optimizer/algorithms/simba/simba.py +128 -112
  85. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.5/deepeval/optimizer}/configs.py +5 -8
  86. deepeval-3.7.4/deepeval/optimization/policies/selection.py → deepeval-3.7.5/deepeval/optimizer/policies.py +63 -2
  87. deepeval-3.7.5/deepeval/optimizer/prompt_optimizer.py +263 -0
  88. deepeval-3.7.5/deepeval/optimizer/rewriter/__init__.py +5 -0
  89. deepeval-3.7.5/deepeval/optimizer/rewriter/rewriter.py +124 -0
  90. deepeval-3.7.5/deepeval/optimizer/rewriter/utils.py +214 -0
  91. deepeval-3.7.5/deepeval/optimizer/scorer/__init__.py +5 -0
  92. deepeval-3.7.5/deepeval/optimizer/scorer/base.py +86 -0
  93. deepeval-3.7.5/deepeval/optimizer/scorer/scorer.py +316 -0
  94. deepeval-3.7.5/deepeval/optimizer/scorer/utils.py +30 -0
  95. deepeval-3.7.5/deepeval/optimizer/types.py +148 -0
  96. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.5/deepeval/optimizer}/utils.py +47 -165
  97. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/prompt/prompt.py +5 -9
  98. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/__init__.py +1 -3
  99. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/api.py +12 -10
  100. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/conversational_test_case.py +19 -1
  101. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/llm_test_case.py +152 -1
  102. deepeval-3.7.5/deepeval/test_case/utils.py +20 -0
  103. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/api.py +15 -14
  104. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/test_run.py +3 -3
  105. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/patchers.py +9 -4
  106. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/tracing.py +2 -2
  107. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/utils.py +65 -0
  108. {deepeval-3.7.4 → deepeval-3.7.5}/pyproject.toml +4 -4
  109. deepeval-3.7.4/deepeval/_version.py +0 -1
  110. deepeval-3.7.4/deepeval/integrations/pydantic_ai/agent.py +0 -21
  111. deepeval-3.7.4/deepeval/metrics/answer_relevancy/template.py +0 -110
  112. deepeval-3.7.4/deepeval/metrics/contextual_precision/template.py +0 -84
  113. deepeval-3.7.4/deepeval/metrics/contextual_recall/template.py +0 -75
  114. deepeval-3.7.4/deepeval/metrics/contextual_relevancy/template.py +0 -77
  115. deepeval-3.7.4/deepeval/metrics/faithfulness/template.py +0 -140
  116. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/__init__.py +0 -24
  117. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  118. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  119. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  120. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  121. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  122. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  123. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  124. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  125. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  126. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  127. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  128. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  129. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  130. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  131. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  132. deepeval-3.7.4/deepeval/models/llms/azure_model.py +0 -299
  133. deepeval-3.7.4/deepeval/models/llms/ollama_model.py +0 -114
  134. deepeval-3.7.4/deepeval/models/mlllms/__init__.py +0 -4
  135. deepeval-3.7.4/deepeval/models/mlllms/gemini_model.py +0 -313
  136. deepeval-3.7.4/deepeval/models/mlllms/openai_model.py +0 -309
  137. deepeval-3.7.4/deepeval/optimization/__init__.py +0 -13
  138. deepeval-3.7.4/deepeval/optimization/adapters/__init__.py +0 -2
  139. deepeval-3.7.4/deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  140. deepeval-3.7.4/deepeval/optimization/aggregates.py +0 -14
  141. deepeval-3.7.4/deepeval/optimization/copro/configs.py +0 -31
  142. deepeval-3.7.4/deepeval/optimization/gepa/__init__.py +0 -7
  143. deepeval-3.7.4/deepeval/optimization/gepa/configs.py +0 -115
  144. deepeval-3.7.4/deepeval/optimization/miprov2/configs.py +0 -134
  145. deepeval-3.7.4/deepeval/optimization/miprov2/loop.py +0 -785
  146. deepeval-3.7.4/deepeval/optimization/mutations/__init__.py +0 -0
  147. deepeval-3.7.4/deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  148. deepeval-3.7.4/deepeval/optimization/policies/__init__.py +0 -16
  149. deepeval-3.7.4/deepeval/optimization/policies/tie_breaker.py +0 -67
  150. deepeval-3.7.4/deepeval/optimization/prompt_optimizer.py +0 -462
  151. deepeval-3.7.4/deepeval/optimization/simba/__init__.py +0 -0
  152. deepeval-3.7.4/deepeval/optimization/simba/configs.py +0 -33
  153. deepeval-3.7.4/deepeval/optimization/types.py +0 -361
  154. deepeval-3.7.4/deepeval/plugins/__init__.py +0 -0
  155. deepeval-3.7.4/deepeval/synthesizer/chunking/__init__.py +0 -0
  156. deepeval-3.7.4/deepeval/test_case/mllm_test_case.py +0 -170
  157. deepeval-3.7.4/deepeval/test_case/utils.py +0 -24
  158. {deepeval-3.7.4 → deepeval-3.7.5}/LICENSE.md +0 -0
  159. {deepeval-3.7.4 → deepeval-3.7.5}/README.md +0 -0
  160. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/__init__.py +0 -0
  161. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/annotation/__init__.py +0 -0
  162. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/annotation/annotation.py +0 -0
  163. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/annotation/api.py +0 -0
  164. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/anthropic/__init__.py +0 -0
  165. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/anthropic/extractors.py +0 -0
  166. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/anthropic/patch.py +0 -0
  167. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/anthropic/utils.py +0 -0
  168. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/__init__.py +0 -0
  169. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/arc/__init__.py +0 -0
  170. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/arc/arc.py +0 -0
  171. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/arc/mode.py +0 -0
  172. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/arc/template.py +0 -0
  173. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/base_benchmark.py +0 -0
  174. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bbq/__init__.py +0 -0
  175. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bbq/bbq.py +0 -0
  176. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bbq/task.py +0 -0
  177. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bbq/template.py +0 -0
  178. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  179. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  180. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  181. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  182. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  183. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  184. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  185. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  186. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  187. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  188. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  189. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  190. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  191. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  192. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  193. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  194. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  195. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  196. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  197. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  198. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  199. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  200. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  201. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  202. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  203. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  204. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  205. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  206. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  207. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  208. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  209. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  210. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  211. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  212. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  213. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  214. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  215. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  216. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  217. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  218. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  219. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  220. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  221. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  222. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  223. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  224. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  225. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  226. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  227. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  228. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  229. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  230. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  231. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  232. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  233. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  234. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  235. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  236. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  237. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  238. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  239. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  240. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/bool_q/template.py +0 -0
  241. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/drop/__init__.py +0 -0
  242. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/drop/drop.py +0 -0
  243. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/drop/task.py +0 -0
  244. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/drop/template.py +0 -0
  245. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  246. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  247. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  248. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  249. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  250. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  251. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/gsm8k/template.py +0 -0
  252. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  253. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  254. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/hellaswag/task.py +0 -0
  255. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/hellaswag/template.py +0 -0
  256. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  257. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  258. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/human_eval/task.py +0 -0
  259. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/human_eval/template.py +0 -0
  260. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  261. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  262. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/ifeval/template.py +0 -0
  263. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/lambada/__init__.py +0 -0
  264. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/lambada/lambada.py +0 -0
  265. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/lambada/template.py +0 -0
  266. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  267. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  268. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/logi_qa/task.py +0 -0
  269. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/logi_qa/template.py +0 -0
  270. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  271. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  272. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/math_qa/task.py +0 -0
  273. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/math_qa/template.py +0 -0
  274. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  275. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  276. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/mmlu/task.py +0 -0
  277. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/mmlu/template.py +0 -0
  278. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/modes/__init__.py +0 -0
  279. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/results.py +0 -0
  280. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/schema.py +0 -0
  281. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/squad/__init__.py +0 -0
  282. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/squad/squad.py +0 -0
  283. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/squad/task.py +0 -0
  284. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/squad/template.py +0 -0
  285. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/tasks/__init__.py +0 -0
  286. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  287. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  288. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  289. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  290. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  291. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/utils.py +0 -0
  292. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  293. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/winogrande/template.py +0 -0
  294. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  295. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/__init__.py +0 -0
  296. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/dotenv_handler.py +0 -0
  297. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/main.py +0 -0
  298. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/server.py +0 -0
  299. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/test.py +0 -0
  300. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/types.py +0 -0
  301. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/cli/utils.py +0 -0
  302. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/confident/__init__.py +0 -0
  303. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/confident/api.py +0 -0
  304. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/confident/types.py +0 -0
  305. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/config/__init__.py +0 -0
  306. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/config/logging.py +0 -0
  307. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/config/settings.py +0 -0
  308. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/config/settings_manager.py +0 -0
  309. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/config/utils.py +0 -0
  310. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/constants.py +0 -0
  311. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/contextvars.py +0 -0
  312. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/__init__.py +0 -0
  313. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/api.py +0 -0
  314. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/dataset.py +0 -0
  315. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/test_run_tracer.py +0 -0
  316. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/types.py +0 -0
  317. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/dataset/utils.py +0 -0
  318. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/errors.py +0 -0
  319. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/__init__.py +0 -0
  320. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/api.py +0 -0
  321. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/compare.py +0 -0
  322. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/configs.py +0 -0
  323. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/evaluate/types.py +0 -0
  324. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/__init__.py +0 -0
  325. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/crewai/__init__.py +0 -0
  326. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/crewai/handler.py +0 -0
  327. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/crewai/subs.py +0 -0
  328. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/crewai/tool.py +0 -0
  329. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/crewai/wrapper.py +0 -0
  330. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/hugging_face/__init__.py +0 -0
  331. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/hugging_face/callback.py +0 -0
  332. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  333. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  334. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/hugging_face/utils.py +0 -0
  335. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/langchain/__init__.py +0 -0
  336. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/langchain/callback.py +0 -0
  337. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/langchain/patch.py +0 -0
  338. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/langchain/utils.py +0 -0
  339. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/llama_index/__init__.py +0 -0
  340. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/llama_index/handler.py +0 -0
  341. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/llama_index/utils.py +0 -0
  342. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  343. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  344. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
  345. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/key_handler.py +0 -0
  346. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  347. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  348. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/api.py +0 -0
  349. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  350. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  351. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  352. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/arena_g_eval/template.py +0 -0
  353. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  354. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  355. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  356. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/argument_correctness/schema.py +0 -0
  357. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/argument_correctness/template.py +0 -0
  358. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/bias/__init__.py +0 -0
  359. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/bias/bias.py +0 -0
  360. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/bias/schema.py +0 -0
  361. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/bias/template.py +0 -0
  362. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  363. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_precision/schema.py +0 -0
  364. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  365. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_recall/schema.py +0 -0
  366. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  367. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  368. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  369. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  370. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  371. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversation_completeness/template.py +0 -0
  372. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  373. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  374. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  375. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_dag/templates.py +0 -0
  376. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  377. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  378. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  379. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  380. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/__init__.py +0 -0
  381. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/dag.py +0 -0
  382. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/graph.py +0 -0
  383. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/nodes.py +0 -0
  384. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/schema.py +0 -0
  385. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/dag/utils.py +0 -0
  386. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/exact_match/__init__.py +0 -0
  387. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/exact_match/exact_match.py +0 -0
  388. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/faithfulness/__init__.py +0 -0
  389. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/g_eval/__init__.py +0 -0
  390. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/g_eval/g_eval.py +0 -0
  391. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/g_eval/schema.py +0 -0
  392. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/g_eval/template.py +0 -0
  393. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
  394. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
  395. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/goal_accuracy/schema.py +0 -0
  396. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/goal_accuracy/template.py +0 -0
  397. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/hallucination/__init__.py +0 -0
  398. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/hallucination/hallucination.py +0 -0
  399. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/hallucination/schema.py +0 -0
  400. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/hallucination/template.py +0 -0
  401. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/json_correctness/__init__.py +0 -0
  402. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  403. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/json_correctness/schema.py +0 -0
  404. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/json_correctness/template.py +0 -0
  405. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  406. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  407. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  408. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/knowledge_retention/template.py +0 -0
  409. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp/__init__.py +0 -0
  410. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  411. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  412. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp/schema.py +0 -0
  413. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp/template.py +0 -0
  414. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  415. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  416. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  417. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  418. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/misuse/__init__.py +0 -0
  419. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/misuse/misuse.py +0 -0
  420. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/misuse/schema.py +0 -0
  421. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/misuse/template.py +0 -0
  422. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  423. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  424. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  425. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  426. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  427. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  428. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  429. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  430. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  431. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  432. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  433. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  434. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy → deepeval-3.7.5/deepeval/metrics/multimodal_metrics/multimodal_g_eval}/__init__.py +0 -0
  435. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  436. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision → deepeval-3.7.5/deepeval/metrics/multimodal_metrics/text_to_image}/__init__.py +0 -0
  437. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  438. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  439. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/non_advice/__init__.py +0 -0
  440. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/non_advice/non_advice.py +0 -0
  441. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/non_advice/schema.py +0 -0
  442. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/non_advice/template.py +0 -0
  443. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall → deepeval-3.7.5/deepeval/metrics/pattern_match}/__init__.py +0 -0
  444. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
  445. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  446. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  447. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/pii_leakage/schema.py +0 -0
  448. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/pii_leakage/template.py +0 -0
  449. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_adherence/__init__.py +0 -0
  450. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
  451. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_adherence/schema.py +0 -0
  452. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_adherence/template.py +0 -0
  453. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_quality/__init__.py +0 -0
  454. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
  455. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_quality/schema.py +0 -0
  456. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/plan_quality/template.py +0 -0
  457. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy → deepeval-3.7.5/deepeval/metrics/prompt_alignment}/__init__.py +0 -0
  458. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  459. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  460. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/prompt_alignment/template.py +0 -0
  461. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness → deepeval-3.7.5/deepeval/metrics/role_adherence}/__init__.py +0 -0
  462. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  463. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_adherence/schema.py +0 -0
  464. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_adherence/template.py +0 -0
  465. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_violation/__init__.py +0 -0
  466. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_violation/role_violation.py +0 -0
  467. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_violation/schema.py +0 -0
  468. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/role_violation/template.py +0 -0
  469. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/step_efficiency/__init__.py +0 -0
  470. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/step_efficiency/schema.py +0 -0
  471. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
  472. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/step_efficiency/template.py +0 -0
  473. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/summarization/__init__.py +0 -0
  474. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/summarization/schema.py +0 -0
  475. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/summarization/summarization.py +0 -0
  476. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/summarization/template.py +0 -0
  477. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval → deepeval-3.7.5/deepeval/metrics/task_completion}/__init__.py +0 -0
  478. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/task_completion/schema.py +0 -0
  479. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/task_completion/task_completion.py +0 -0
  480. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/task_completion/template.py +0 -0
  481. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness → deepeval-3.7.5/deepeval/metrics/tool_correctness}/__init__.py +0 -0
  482. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_correctness/schema.py +0 -0
  483. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_correctness/template.py +0 -0
  484. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_use/__init__.py +0 -0
  485. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_use/schema.py +0 -0
  486. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_use/template.py +0 -0
  487. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/tool_use/tool_use.py +0 -0
  488. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/topic_adherence/__init__.py +0 -0
  489. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/topic_adherence/schema.py +0 -0
  490. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/topic_adherence/template.py +0 -0
  491. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
  492. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/toxicity/__init__.py +0 -0
  493. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/toxicity/schema.py +0 -0
  494. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/toxicity/template.py +0 -0
  495. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/toxicity/toxicity.py +0 -0
  496. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/text_to_image → deepeval-3.7.5/deepeval/metrics/turn_contextual_precision}/__init__.py +0 -0
  497. {deepeval-3.7.4/deepeval/metrics/pattern_match → deepeval-3.7.5/deepeval/metrics/turn_contextual_recall}/__init__.py +0 -0
  498. {deepeval-3.7.4/deepeval/metrics/prompt_alignment → deepeval-3.7.5/deepeval/metrics/turn_contextual_relevancy}/__init__.py +0 -0
  499. {deepeval-3.7.4/deepeval/metrics/role_adherence → deepeval-3.7.5/deepeval/metrics/turn_faithfulness}/__init__.py +0 -0
  500. {deepeval-3.7.4/deepeval/metrics/task_completion → deepeval-3.7.5/deepeval/metrics/turn_relevancy}/__init__.py +0 -0
  501. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  502. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/turn_relevancy/template.py +0 -0
  503. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  504. {deepeval-3.7.4/deepeval/metrics/tool_correctness → deepeval-3.7.5/deepeval/model_integrations}/__init__.py +0 -0
  505. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/model_integrations/types.py +0 -0
  506. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/model_integrations/utils.py +0 -0
  507. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/_summac_model.py +0 -0
  508. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/answer_relevancy_model.py +0 -0
  509. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/detoxify_model.py +0 -0
  510. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/hallucination_model.py +0 -0
  511. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/llms/__init__.py +0 -0
  512. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/summac_model.py +0 -0
  513. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/models/unbias_model.py +0 -0
  514. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai/__init__.py +0 -0
  515. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai/extractors.py +0 -0
  516. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai/patch.py +0 -0
  517. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai/utils.py +0 -0
  518. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/__init__.py +0 -0
  519. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/agent.py +0 -0
  520. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/callback_handler.py +0 -0
  521. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/extractors.py +0 -0
  522. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/patch.py +0 -0
  523. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/openai_agents/runner.py +0 -0
  524. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.5/deepeval/optimizer/algorithms}/simba/types.py +0 -0
  525. {deepeval-3.7.4/deepeval/metrics/turn_relevancy → deepeval-3.7.5/deepeval/plugins}/__init__.py +0 -0
  526. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/plugins/plugin.py +0 -0
  527. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/progress_context.py +0 -0
  528. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/prompt/__init__.py +0 -0
  529. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/prompt/api.py +0 -0
  530. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/prompt/utils.py +0 -0
  531. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/py.typed +0 -0
  532. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/red_teaming/README.md +0 -0
  533. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/scorer/__init__.py +0 -0
  534. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/scorer/scorer.py +0 -0
  535. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/simulator/__init__.py +0 -0
  536. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/simulator/conversation_simulator.py +0 -0
  537. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/simulator/schema.py +0 -0
  538. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/simulator/template.py +0 -0
  539. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/singleton.py +0 -0
  540. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/__init__.py +0 -0
  541. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/base_synthesizer.py +0 -0
  542. {deepeval-3.7.4/deepeval/model_integrations → deepeval-3.7.5/deepeval/synthesizer/chunking}/__init__.py +0 -0
  543. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  544. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  545. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/config.py +0 -0
  546. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/schema.py +0 -0
  547. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/synthesizer.py +0 -0
  548. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/templates/__init__.py +0 -0
  549. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/templates/template.py +0 -0
  550. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  551. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  552. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/types.py +0 -0
  553. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/synthesizer/utils.py +0 -0
  554. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/telemetry.py +0 -0
  555. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/arena_test_case.py +0 -0
  556. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_case/mcp.py +0 -0
  557. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/__init__.py +0 -0
  558. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/cache.py +0 -0
  559. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/hooks.py +0 -0
  560. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/test_run/hyperparameters.py +0 -0
  561. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/__init__.py +0 -0
  562. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/api.py +0 -0
  563. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/context.py +0 -0
  564. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/offline_evals/__init__.py +0 -0
  565. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/offline_evals/api.py +0 -0
  566. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/offline_evals/span.py +0 -0
  567. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/offline_evals/thread.py +0 -0
  568. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/offline_evals/trace.py +0 -0
  569. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/otel/__init__.py +0 -0
  570. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/otel/exporter.py +0 -0
  571. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/otel/test_exporter.py +0 -0
  572. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/otel/utils.py +0 -0
  573. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  574. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/trace_context.py +0 -0
  575. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/trace_test_manager.py +0 -0
  576. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/types.py +0 -0
  577. {deepeval-3.7.4 → deepeval-3.7.5}/deepeval/tracing/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.4
3
+ Version: 3.7.5
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -13,13 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Requires-Dist: aiohttp
16
- Requires-Dist: anthropic
17
16
  Requires-Dist: click (>=8.0.0,<8.3.0)
18
- Requires-Dist: google-genai (>=1.9.0,<2.0.0)
19
17
  Requires-Dist: grpcio (>=1.67.1,<2.0.0)
20
18
  Requires-Dist: jinja2
21
19
  Requires-Dist: nest_asyncio
22
- Requires-Dist: ollama
23
20
  Requires-Dist: openai
24
21
  Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
25
22
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)
@@ -0,0 +1 @@
1
+ __version__: str = "3.7.5"
@@ -1,6 +1,6 @@
1
- from pydantic import BaseModel, Field, PrivateAttr
1
+ from pydantic import BaseModel, Field, PrivateAttr, model_validator
2
2
  from typing import Optional, Dict, List
3
- from deepeval.test_case import ToolCall, Turn
3
+ from deepeval.test_case import ToolCall, Turn, MLLMImage
4
4
 
5
5
 
6
6
  class Golden(BaseModel):
@@ -32,10 +32,40 @@ class Golden(BaseModel):
32
32
  custom_column_key_values: Optional[Dict[str, str]] = Field(
33
33
  default=None, serialization_alias="customColumnKeyValues"
34
34
  )
35
+ multimodal: bool = Field(False, exclude=True)
35
36
  _dataset_rank: Optional[int] = PrivateAttr(default=None)
36
37
  _dataset_alias: Optional[str] = PrivateAttr(default=None)
37
38
  _dataset_id: Optional[str] = PrivateAttr(default=None)
38
39
 
40
+ @model_validator(mode="after")
41
+ def set_is_multimodal(self):
42
+ import re
43
+
44
+ if self.multimodal is True:
45
+ return self
46
+
47
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
48
+ self.multimodal = (
49
+ any(
50
+ [
51
+ (
52
+ re.search(pattern, self.input) is not None
53
+ if self.input
54
+ else False
55
+ ),
56
+ (
57
+ re.search(pattern, self.actual_output) is not None
58
+ if self.actual_output
59
+ else False
60
+ ),
61
+ ]
62
+ )
63
+ if isinstance(self.input, str)
64
+ else self.multimodal
65
+ )
66
+
67
+ return self
68
+
39
69
 
40
70
  class ConversationalGolden(BaseModel):
41
71
  scenario: str
@@ -55,6 +85,28 @@ class ConversationalGolden(BaseModel):
55
85
  default=None, serialization_alias="customColumnKeyValues"
56
86
  )
57
87
  turns: Optional[List[Turn]] = Field(default=None)
88
+ multimodal: bool = Field(False, exclude=True)
58
89
  _dataset_rank: Optional[int] = PrivateAttr(default=None)
59
90
  _dataset_alias: Optional[str] = PrivateAttr(default=None)
60
91
  _dataset_id: Optional[str] = PrivateAttr(default=None)
92
+
93
+ @model_validator(mode="after")
94
+ def set_is_multimodal(self):
95
+ import re
96
+
97
+ if self.multimodal is True:
98
+ return self
99
+
100
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
101
+ self.multimodal = (
102
+ any(
103
+ [
104
+ re.search(pattern, turn.content) is not None
105
+ for turn in self.turns
106
+ ]
107
+ )
108
+ if self.turns
109
+ else self.multimodal
110
+ )
111
+
112
+ return self
@@ -54,7 +54,6 @@ from deepeval.metrics.indicator import (
54
54
  from deepeval.test_case import (
55
55
  LLMTestCase,
56
56
  ConversationalTestCase,
57
- MLLMTestCase,
58
57
  )
59
58
  from deepeval.test_run import (
60
59
  global_test_run_manager,
@@ -71,9 +70,7 @@ from deepeval.evaluate.execute import (
71
70
 
72
71
 
73
72
  def assert_test(
74
- test_case: Optional[
75
- Union[LLMTestCase, ConversationalTestCase, MLLMTestCase]
76
- ] = None,
73
+ test_case: Optional[Union[LLMTestCase, ConversationalTestCase]] = None,
77
74
  metrics: Optional[
78
75
  Union[
79
76
  List[BaseMetric],
@@ -175,7 +172,7 @@ def assert_test(
175
172
  try:
176
173
  if not metric_data.success:
177
174
  failed_metrics_data.append(metric_data)
178
- except:
175
+ except Exception:
179
176
  failed_metrics_data.append(metric_data)
180
177
 
181
178
  failed_metrics_str = ", ".join(
@@ -188,9 +185,7 @@ def assert_test(
188
185
 
189
186
 
190
187
  def evaluate(
191
- test_cases: Union[
192
- List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
193
- ],
188
+ test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
194
189
  metrics: Optional[
195
190
  Union[
196
191
  List[BaseMetric],
@@ -272,6 +267,19 @@ def evaluate(
272
267
  test_run.hyperparameters = process_hyperparameters(hyperparameters)
273
268
  test_run.prompts = process_prompts(hyperparameters)
274
269
  global_test_run_manager.save_test_run(TEMP_FILE_PATH)
270
+
271
+ # In CLI mode (`deepeval test run`), the CLI owns finalization and will
272
+ # call `wrap_up_test_run()` once after pytest finishes. Finalizing here
273
+ # as well would double finalize the run and consequently result in
274
+ # duplicate uploads / local saves and temp file races, so only
275
+ # do it when we're NOT in CLI mode.
276
+ if get_is_running_deepeval():
277
+ return EvaluationResult(
278
+ test_results=test_results,
279
+ confident_link=None,
280
+ test_run_id=None,
281
+ )
282
+
275
283
  res = global_test_run_manager.wrap_up_test_run(
276
284
  run_duration, display_table=False
277
285
  )
@@ -58,6 +58,13 @@ from deepeval.metrics import (
58
58
  BaseConversationalMetric,
59
59
  BaseMultimodalMetric,
60
60
  TaskCompletionMetric,
61
+ # RAG metrics that support both single-turn and multimodal
62
+ ContextualPrecisionMetric,
63
+ ContextualRecallMetric,
64
+ ContextualRelevancyMetric,
65
+ AnswerRelevancyMetric,
66
+ FaithfulnessMetric,
67
+ ToolCorrectnessMetric,
61
68
  )
62
69
  from deepeval.metrics.indicator import (
63
70
  measure_metrics_with_indicator,
@@ -70,7 +77,6 @@ from deepeval.models.retry_policy import (
70
77
  from deepeval.test_case import (
71
78
  LLMTestCase,
72
79
  ConversationalTestCase,
73
- MLLMTestCase,
74
80
  )
75
81
  from deepeval.test_case.api import create_api_test_case
76
82
  from deepeval.test_run import (
@@ -110,6 +116,15 @@ from deepeval.test_run.hyperparameters import (
110
116
 
111
117
  logger = logging.getLogger(__name__)
112
118
 
119
+ MLLM_SUPPORTED_METRICS = [
120
+ ContextualPrecisionMetric,
121
+ ContextualRecallMetric,
122
+ ContextualRelevancyMetric,
123
+ AnswerRelevancyMetric,
124
+ FaithfulnessMetric,
125
+ ToolCorrectnessMetric,
126
+ ]
127
+
113
128
 
114
129
  def _skip_metrics_for_error(
115
130
  span: Optional[BaseSpan] = None,
@@ -263,9 +278,7 @@ async def _await_with_outer_deadline(obj, *args, timeout: float, **kwargs):
263
278
 
264
279
 
265
280
  def execute_test_cases(
266
- test_cases: Union[
267
- List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
268
- ],
281
+ test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
269
282
  metrics: Union[
270
283
  List[BaseMetric],
271
284
  List[BaseConversationalMetric],
@@ -307,6 +320,8 @@ def execute_test_cases(
307
320
  metric.async_mode = False
308
321
  if isinstance(metric, BaseMetric):
309
322
  llm_metrics.append(metric)
323
+ if type(metric) in MLLM_SUPPORTED_METRICS:
324
+ mllm_metrics.append(metric)
310
325
  elif isinstance(metric, BaseConversationalMetric):
311
326
  conversational_metrics.append(metric)
312
327
  elif isinstance(metric, BaseMultimodalMetric):
@@ -325,12 +340,12 @@ def execute_test_cases(
325
340
  )
326
341
  for i, test_case in enumerate(test_cases):
327
342
  # skip what we know we won't run
328
- if isinstance(test_case, LLMTestCase):
343
+ if isinstance(test_case, LLMTestCase) and not test_case.multimodal:
329
344
  if not llm_metrics:
330
345
  update_pbar(progress, pbar_id)
331
346
  continue
332
347
  per_case_total = len(llm_metrics)
333
- elif isinstance(test_case, MLLMTestCase):
348
+ elif isinstance(test_case, LLMTestCase) and test_case.multimodal:
334
349
  if not mllm_metrics:
335
350
  update_pbar(progress, pbar_id)
336
351
  continue
@@ -349,10 +364,16 @@ def execute_test_cases(
349
364
 
350
365
  metrics_for_case = (
351
366
  llm_metrics
352
- if isinstance(test_case, LLMTestCase)
367
+ if (
368
+ isinstance(test_case, LLMTestCase)
369
+ and not test_case.multimodal
370
+ )
353
371
  else (
354
372
  mllm_metrics
355
- if isinstance(test_case, MLLMTestCase)
373
+ if (
374
+ isinstance(test_case, LLMTestCase)
375
+ and test_case.multimodal
376
+ )
356
377
  else conversational_metrics
357
378
  )
358
379
  )
@@ -360,10 +381,16 @@ def execute_test_cases(
360
381
  test_case=test_case,
361
382
  index=(
362
383
  llm_test_case_count + 1
363
- if isinstance(test_case, LLMTestCase)
384
+ if (
385
+ isinstance(test_case, LLMTestCase)
386
+ and not test_case.multimodal
387
+ )
364
388
  else (
365
389
  mllm_test_case_count + 1
366
- if isinstance(test_case, MLLMTestCase)
390
+ if (
391
+ isinstance(test_case, LLMTestCase)
392
+ and test_case.multimodal
393
+ )
367
394
  else conversational_test_case_count + 1
368
395
  )
369
396
  ),
@@ -383,7 +410,10 @@ def execute_test_cases(
383
410
  for metric in metrics:
384
411
  metric.error = None # Reset metric error
385
412
 
386
- if isinstance(test_case, LLMTestCase):
413
+ if (
414
+ isinstance(test_case, LLMTestCase)
415
+ and not test_case.multimodal
416
+ ):
387
417
  llm_test_case_count += 1
388
418
  cached_test_case = None
389
419
  if cache_config.use_cache:
@@ -436,7 +466,10 @@ def execute_test_cases(
436
466
  update_pbar(progress, pbar_test_case_id)
437
467
 
438
468
  # No caching and not sending test cases to Confident AI for multimodal metrics yet
439
- elif isinstance(test_case, MLLMTestCase):
469
+ elif (
470
+ isinstance(test_case, LLMTestCase)
471
+ and test_case.multimodal
472
+ ):
440
473
  mllm_test_case_count += 1
441
474
  for metric in mllm_metrics:
442
475
  current_index = index_of[id(metric)]
@@ -560,9 +593,7 @@ def execute_test_cases(
560
593
 
561
594
 
562
595
  async def a_execute_test_cases(
563
- test_cases: Union[
564
- List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
565
- ],
596
+ test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
566
597
  metrics: Union[
567
598
  List[BaseMetric],
568
599
  List[BaseConversationalMetric],
@@ -605,6 +636,8 @@ async def a_execute_test_cases(
605
636
  for metric in metrics:
606
637
  if isinstance(metric, BaseMetric):
607
638
  llm_metrics.append(metric)
639
+ if type(metric) in MLLM_SUPPORTED_METRICS:
640
+ mllm_metrics.append(metric)
608
641
  elif isinstance(metric, BaseMultimodalMetric):
609
642
  mllm_metrics.append(metric)
610
643
  elif isinstance(metric, BaseConversationalMetric):
@@ -613,7 +646,7 @@ async def a_execute_test_cases(
613
646
  llm_test_case_counter = -1
614
647
  mllm_test_case_counter = -1
615
648
  conversational_test_case_counter = -1
616
- test_results: List[Union[TestResult, MLLMTestCase]] = []
649
+ test_results: List[Union[TestResult, LLMTestCase]] = []
617
650
  tasks = []
618
651
 
619
652
  if display_config.show_indicator and _use_bar_indicator:
@@ -632,7 +665,10 @@ async def a_execute_test_cases(
632
665
  with progress:
633
666
  for test_case in test_cases:
634
667
  with capture_evaluation_run("test case"):
635
- if isinstance(test_case, LLMTestCase):
668
+ if (
669
+ isinstance(test_case, LLMTestCase)
670
+ and not test_case.multimodal
671
+ ):
636
672
  if len(llm_metrics) == 0:
637
673
  update_pbar(progress, pbar_id)
638
674
  continue
@@ -660,7 +696,10 @@ async def a_execute_test_cases(
660
696
  )
661
697
  tasks.append(asyncio.create_task(task))
662
698
 
663
- elif isinstance(test_case, MLLMTestCase):
699
+ elif (
700
+ isinstance(test_case, LLMTestCase)
701
+ and test_case.multimodal
702
+ ):
664
703
  mllm_test_case_counter += 1
665
704
  copied_multimodal_metrics: List[
666
705
  BaseMultimodalMetric
@@ -724,7 +763,10 @@ async def a_execute_test_cases(
724
763
  else:
725
764
  for test_case in test_cases:
726
765
  with capture_evaluation_run("test case"):
727
- if isinstance(test_case, LLMTestCase):
766
+ if (
767
+ isinstance(test_case, LLMTestCase)
768
+ and not test_case.multimodal
769
+ ):
728
770
  if len(llm_metrics) == 0:
729
771
  continue
730
772
  llm_test_case_counter += 1
@@ -772,7 +814,9 @@ async def a_execute_test_cases(
772
814
  )
773
815
  tasks.append(asyncio.create_task((task)))
774
816
 
775
- elif isinstance(test_case, MLLMTestCase):
817
+ elif (
818
+ isinstance(test_case, LLMTestCase) and test_case.multimodal
819
+ ):
776
820
  mllm_test_case_counter += 1
777
821
  copied_multimodal_metrics: List[BaseMultimodalMetric] = (
778
822
  copy_metrics(mllm_metrics)
@@ -815,7 +859,7 @@ async def _a_execute_llm_test_cases(
815
859
  metrics: List[BaseMetric],
816
860
  test_case: LLMTestCase,
817
861
  test_run_manager: TestRunManager,
818
- test_results: List[Union[TestResult, MLLMTestCase]],
862
+ test_results: List[Union[TestResult, LLMTestCase]],
819
863
  count: int,
820
864
  test_run: TestRun,
821
865
  ignore_errors: bool,
@@ -934,9 +978,9 @@ async def _a_execute_llm_test_cases(
934
978
 
935
979
  async def _a_execute_mllm_test_cases(
936
980
  metrics: List[BaseMultimodalMetric],
937
- test_case: MLLMTestCase,
981
+ test_case: LLMTestCase,
938
982
  test_run_manager: TestRunManager,
939
- test_results: List[Union[TestResult, MLLMTestCase]],
983
+ test_results: List[Union[TestResult, LLMTestCase]],
940
984
  count: int,
941
985
  ignore_errors: bool,
942
986
  skip_on_missing_params: bool,
@@ -1013,7 +1057,7 @@ async def _a_execute_conversational_test_cases(
1013
1057
  ],
1014
1058
  test_case: ConversationalTestCase,
1015
1059
  test_run_manager: TestRunManager,
1016
- test_results: List[Union[TestResult, MLLMTestCase]],
1060
+ test_results: List[Union[TestResult, LLMTestCase]],
1017
1061
  count: int,
1018
1062
  ignore_errors: bool,
1019
1063
  skip_on_missing_params: bool,
@@ -1776,7 +1820,7 @@ async def a_execute_agentic_test_cases(
1776
1820
  async def _a_execute_agentic_test_case(
1777
1821
  golden: Golden,
1778
1822
  test_run_manager: TestRunManager,
1779
- test_results: List[Union[TestResult, MLLMTestCase]],
1823
+ test_results: List[Union[TestResult, LLMTestCase]],
1780
1824
  count: int,
1781
1825
  verbose_mode: Optional[bool],
1782
1826
  ignore_errors: bool,
@@ -3205,7 +3249,7 @@ async def _evaluate_test_case_pairs(
3205
3249
 
3206
3250
  def _execute_metric(
3207
3251
  metric: BaseMetric,
3208
- test_case: Union[LLMTestCase, ConversationalTestCase, MLLMTestCase],
3252
+ test_case: Union[LLMTestCase, ConversationalTestCase],
3209
3253
  show_metric_indicator: bool,
3210
3254
  in_component: bool,
3211
3255
  error_config: ErrorConfig,
@@ -16,7 +16,6 @@ from deepeval.metrics import (
16
16
  from deepeval.test_case import (
17
17
  LLMTestCase,
18
18
  ConversationalTestCase,
19
- MLLMTestCase,
20
19
  )
21
20
  from deepeval.test_run import (
22
21
  LLMApiTestCase,
@@ -129,17 +128,14 @@ def create_test_result(
129
128
  turns=api_test_case.turns,
130
129
  )
131
130
  else:
132
- multimodal = (
133
- api_test_case.multimodal_input is not None
134
- and api_test_case.multimodal_input_actual_output is not None
135
- )
131
+ multimodal = api_test_case.images_mapping
136
132
  if multimodal:
137
133
  return TestResult(
138
134
  name=name,
139
135
  success=api_test_case.success,
140
136
  metrics_data=api_test_case.metrics_data,
141
- input=api_test_case.multimodal_input,
142
- actual_output=api_test_case.multimodal_input_actual_output,
137
+ input=api_test_case.input,
138
+ actual_output=api_test_case.actual_output,
143
139
  conversational=False,
144
140
  multimodal=True,
145
141
  additional_metadata=api_test_case.additional_metadata,
@@ -222,9 +218,9 @@ def validate_assert_test_inputs(
222
218
  )
223
219
 
224
220
  if test_case and metrics:
225
- if isinstance(test_case, LLMTestCase) and not all(
226
- isinstance(metric, BaseMetric) for metric in metrics
227
- ):
221
+ if (
222
+ isinstance(test_case, LLMTestCase) and not test_case.multimodal
223
+ ) and not all(isinstance(metric, BaseMetric) for metric in metrics):
228
224
  raise ValueError(
229
225
  "All 'metrics' for an 'LLMTestCase' must be instances of 'BaseMetric' only."
230
226
  )
@@ -234,11 +230,17 @@ def validate_assert_test_inputs(
234
230
  raise ValueError(
235
231
  "All 'metrics' for an 'ConversationalTestCase' must be instances of 'BaseConversationalMetric' only."
236
232
  )
237
- if isinstance(test_case, MLLMTestCase) and not all(
238
- isinstance(metric, BaseMultimodalMetric) for metric in metrics
233
+ if (
234
+ isinstance(test_case, LLMTestCase) and test_case.multimodal
235
+ ) and not all(
236
+ (
237
+ isinstance(metric, BaseMultimodalMetric)
238
+ or isinstance(metric, BaseMetric)
239
+ )
240
+ for metric in metrics
239
241
  ):
240
242
  raise ValueError(
241
- "All 'metrics' for an 'MLLMTestCase' must be instances of 'BaseMultimodalMetric' only."
243
+ "All 'metrics' for multi-modal LLMTestCase must be instances of 'BaseMultimodalMetric' only."
242
244
  )
243
245
 
244
246
  if not ((golden and observed_callback) or (test_case and metrics)):
@@ -251,9 +253,7 @@ def validate_evaluate_inputs(
251
253
  goldens: Optional[List] = None,
252
254
  observed_callback: Optional[Callable] = None,
253
255
  test_cases: Optional[
254
- Union[
255
- List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
256
- ]
256
+ Union[List[LLMTestCase], List[ConversationalTestCase]]
257
257
  ] = None,
258
258
  metrics: Optional[
259
259
  Union[
@@ -292,9 +292,10 @@ def validate_evaluate_inputs(
292
292
  if test_cases and metrics:
293
293
  for test_case in test_cases:
294
294
  for metric in metrics:
295
- if isinstance(test_case, LLMTestCase) and not isinstance(
296
- metric, BaseMetric
297
- ):
295
+ if (
296
+ isinstance(test_case, LLMTestCase)
297
+ and not test_case.multimodal
298
+ ) and not isinstance(metric, BaseMetric):
298
299
  raise ValueError(
299
300
  f"Metric {metric.__name__} is not a valid metric for LLMTestCase."
300
301
  )
@@ -305,11 +306,14 @@ def validate_evaluate_inputs(
305
306
  raise ValueError(
306
307
  f"Metric {metric.__name__} is not a valid metric for ConversationalTestCase."
307
308
  )
308
- if isinstance(test_case, MLLMTestCase) and not isinstance(
309
- metric, BaseMultimodalMetric
309
+ if (
310
+ isinstance(test_case, LLMTestCase) and test_case.multimodal
311
+ ) and not (
312
+ isinstance(metric, BaseMultimodalMetric)
313
+ or isinstance(metric, BaseMetric)
310
314
  ):
311
315
  raise ValueError(
312
- f"Metric {metric.__name__} is not a valid metric for MLLMTestCase."
316
+ f"Metric {metric.__name__} is not a valid metric for multi-modal LLMTestCase."
313
317
  )
314
318
 
315
319
 
@@ -0,0 +1,38 @@
1
+ import warnings
2
+ from typing import TYPE_CHECKING, Any
3
+
4
+ try:
5
+ from pydantic_ai.agent import Agent as _BaseAgent
6
+
7
+ is_pydantic_ai_installed = True
8
+ except ImportError:
9
+ is_pydantic_ai_installed = False
10
+
11
+ class _BaseAgent:
12
+ """Dummy fallback so imports don't crash when pydantic-ai is missing."""
13
+
14
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
15
+ # No-op: for compatibility
16
+ pass
17
+
18
+
19
+ if TYPE_CHECKING:
20
+ # For type checkers: use the real Agent if available.
21
+ from pydantic_ai.agent import Agent # type: ignore[unused-ignore]
22
+ else:
23
+ # At runtime we always have some base: real Agent or our dummy.
24
+ # This is just to avoid blow-ups.
25
+ Agent = _BaseAgent
26
+
27
+
28
+ class DeepEvalPydanticAIAgent(Agent):
29
+
30
+ def __init__(self, *args, **kwargs):
31
+ warnings.warn(
32
+ "instrument_pydantic_ai is deprecated and will be removed in a future version. "
33
+ "Please use the new ConfidentInstrumentationSettings instead. Docs: https://www.confident-ai.com/docs/integrations/third-party/pydantic-ai",
34
+ DeprecationWarning,
35
+ stacklevel=2,
36
+ )
37
+
38
+ super().__init__(*args, **kwargs)
@@ -1,40 +1,58 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import os
4
6
  from time import perf_counter
5
- from typing import Literal, Optional, List
7
+ from typing import Any, List, Optional, TYPE_CHECKING
6
8
 
7
9
  from deepeval.config.settings import get_settings
8
10
  from deepeval.confident.api import get_confident_api_key
9
11
  from deepeval.metrics.base_metric import BaseMetric
10
12
  from deepeval.prompt import Prompt
11
13
  from deepeval.tracing.context import current_trace_context
12
- from deepeval.tracing.types import Trace
13
- from deepeval.tracing.otel.utils import to_hex_string
14
- from deepeval.tracing.tracing import trace_manager
15
- from deepeval.tracing.otel.utils import normalize_pydantic_ai_messages
16
14
  from deepeval.tracing.otel.exporter import ConfidentSpanExporter
17
-
15
+ from deepeval.tracing.otel.test_exporter import test_exporter
16
+ from deepeval.tracing.otel.utils import (
17
+ normalize_pydantic_ai_messages,
18
+ to_hex_string,
19
+ )
20
+ from deepeval.tracing.perf_epoch_bridge import init_clock_bridge
21
+ from deepeval.tracing.tracing import trace_manager
22
+ from deepeval.tracing.types import (
23
+ AgentSpan,
24
+ Trace,
25
+ TraceSpanStatus,
26
+ ToolCall,
27
+ )
18
28
 
19
29
  logger = logging.getLogger(__name__)
20
30
 
21
-
22
31
  try:
23
- from pydantic_ai.models.instrumented import InstrumentationSettings
24
- from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
32
+ # Optional dependencies
33
+ from opentelemetry.sdk.trace import (
34
+ ReadableSpan as _ReadableSpan,
35
+ SpanProcessor as _SpanProcessor,
36
+ TracerProvider,
37
+ )
25
38
  from opentelemetry.sdk.trace.export import BatchSpanProcessor
26
39
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
27
40
  OTLPSpanExporter,
28
41
  )
29
- from opentelemetry.sdk.trace import ReadableSpan
42
+ from pydantic_ai.models.instrumented import (
43
+ InstrumentationSettings as _BaseInstrumentationSettings,
44
+ )
30
45
 
31
46
  dependency_installed = True
32
47
  except ImportError as e:
48
+ dependency_installed = False
49
+
50
+ # Preserve previous behavior: only log when verbose mode is enabled.
33
51
  if get_settings().DEEPEVAL_VERBOSE_MODE:
34
52
  if isinstance(e, ModuleNotFoundError):
35
53
  logger.warning(
36
54
  "Optional tracing dependency not installed: %s",
37
- e.name,
55
+ getattr(e, "name", repr(e)),
38
56
  stacklevel=2,
39
57
  )
40
58
  else:
@@ -43,26 +61,47 @@ except ImportError as e:
43
61
  e,
44
62
  stacklevel=2,
45
63
  )
46
- dependency_installed = False
64
+
65
+ # Dummy fallbacks so imports and class definitions don't crash when
66
+ # optional deps are missing. Actual use is still guarded by
67
+ # is_dependency_installed().
68
+ class _BaseInstrumentationSettings:
69
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
70
+ pass
71
+
72
+ class _SpanProcessor:
73
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
74
+ pass
75
+
76
+ def on_start(self, span: Any, parent_context: Any) -> None:
77
+ pass
78
+
79
+ def on_end(self, span: Any) -> None:
80
+ pass
81
+
82
+ class _ReadableSpan:
83
+ pass
47
84
 
48
85
 
49
- def is_dependency_installed():
86
+ def is_dependency_installed() -> bool:
50
87
  if not dependency_installed:
51
88
  raise ImportError(
52
- "Dependencies are not installed. Please install it with `pip install pydantic-ai opentelemetry-sdk opentelemetry-exporter-otlp-proto-http`."
89
+ "Dependencies are not installed. Please install it with "
90
+ "`pip install pydantic-ai opentelemetry-sdk "
91
+ "opentelemetry-exporter-otlp-proto-http`."
53
92
  )
54
93
  return True
55
94
 
56
95
 
57
- from deepeval.tracing.types import AgentSpan
58
- from deepeval.confident.api import get_confident_api_key
59
- from deepeval.prompt import Prompt
60
- from deepeval.tracing.otel.test_exporter import test_exporter
61
- from deepeval.tracing.context import current_trace_context
62
- from deepeval.tracing.types import Trace
63
- from deepeval.tracing.otel.utils import to_hex_string
64
- from deepeval.tracing.types import TraceSpanStatus, ToolCall
65
- from deepeval.tracing.perf_epoch_bridge import init_clock_bridge
96
+ if TYPE_CHECKING:
97
+ # For type checkers, use real types
98
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
99
+ from pydantic_ai.models.instrumented import InstrumentationSettings
100
+ else:
101
+ # At runtime we always have something to subclass / annotate with
102
+ InstrumentationSettings = _BaseInstrumentationSettings
103
+ SpanProcessor = _SpanProcessor
104
+ ReadableSpan = _ReadableSpan
66
105
 
67
106
  # OTLP_ENDPOINT = "http://127.0.0.1:4318/v1/traces"
68
107
  OTLP_ENDPOINT = "https://otel.confident-ai.com/v1/traces"