deepeval 3.7.4__tar.gz → 3.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (588) hide show
  1. {deepeval-3.7.4 → deepeval-3.7.6}/PKG-INFO +1 -4
  2. deepeval-3.7.6/deepeval/_version.py +1 -0
  3. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/config/settings.py +35 -1
  4. deepeval-3.7.6/deepeval/dataset/api.py +50 -0
  5. deepeval-3.7.6/deepeval/dataset/golden.py +197 -0
  6. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/evaluate.py +16 -11
  7. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/execute.py +13 -181
  8. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/utils.py +6 -26
  9. deepeval-3.7.6/deepeval/integrations/pydantic_ai/agent.py +38 -0
  10. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/pydantic_ai/instrumentator.py +62 -23
  11. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/key_handler.py +3 -0
  12. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/__init__.py +14 -16
  13. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/answer_relevancy/answer_relevancy.py +118 -116
  14. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/answer_relevancy/template.py +22 -3
  15. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/arena_g_eval/arena_g_eval.py +98 -96
  16. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/arena_g_eval/template.py +17 -1
  17. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/argument_correctness/argument_correctness.py +81 -87
  18. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/argument_correctness/template.py +19 -2
  19. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/base_metric.py +13 -44
  20. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/bias/bias.py +102 -108
  21. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/bias/template.py +14 -2
  22. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_precision/contextual_precision.py +96 -94
  23. deepeval-3.7.6/deepeval/metrics/contextual_precision/template.py +133 -0
  24. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_recall/contextual_recall.py +94 -84
  25. deepeval-3.7.6/deepeval/metrics/contextual_recall/template.py +126 -0
  26. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +86 -84
  27. deepeval-3.7.6/deepeval/metrics/contextual_relevancy/template.py +106 -0
  28. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversation_completeness/conversation_completeness.py +101 -119
  29. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversation_completeness/template.py +23 -3
  30. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_dag/conversational_dag.py +12 -8
  31. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_dag/nodes.py +66 -123
  32. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_dag/templates.py +16 -0
  33. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +47 -66
  34. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/dag.py +10 -0
  35. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/nodes.py +63 -126
  36. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/templates.py +16 -2
  37. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/exact_match/exact_match.py +9 -1
  38. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/faithfulness/faithfulness.py +138 -149
  39. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/faithfulness/schema.py +1 -1
  40. deepeval-3.7.6/deepeval/metrics/faithfulness/template.py +225 -0
  41. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/g_eval/g_eval.py +87 -78
  42. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/g_eval/template.py +18 -1
  43. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/g_eval/utils.py +7 -6
  44. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/goal_accuracy/goal_accuracy.py +91 -76
  45. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/goal_accuracy/template.py +21 -3
  46. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/hallucination/hallucination.py +60 -75
  47. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/hallucination/template.py +13 -0
  48. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/indicator.py +7 -10
  49. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/json_correctness/json_correctness.py +40 -38
  50. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/json_correctness/template.py +10 -0
  51. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/knowledge_retention/knowledge_retention.py +60 -97
  52. deepeval-3.7.6/deepeval/metrics/knowledge_retention/schema.py +21 -0
  53. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/knowledge_retention/template.py +12 -0
  54. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp/mcp_task_completion.py +68 -38
  55. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +92 -74
  56. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp/template.py +52 -0
  57. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +58 -64
  58. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp_use_metric/template.py +12 -0
  59. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/misuse/misuse.py +77 -97
  60. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/misuse/template.py +15 -0
  61. deepeval-3.7.6/deepeval/metrics/multimodal_metrics/__init__.py +5 -0
  62. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +59 -53
  63. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +79 -95
  64. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +59 -53
  65. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +59 -53
  66. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +111 -109
  67. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/non_advice/non_advice.py +79 -105
  68. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/non_advice/template.py +12 -0
  69. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/pattern_match/pattern_match.py +12 -4
  70. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/pii_leakage/pii_leakage.py +75 -106
  71. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/pii_leakage/template.py +14 -0
  72. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_adherence/plan_adherence.py +63 -89
  73. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_adherence/template.py +11 -0
  74. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_quality/plan_quality.py +63 -87
  75. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_quality/template.py +9 -0
  76. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/prompt_alignment/prompt_alignment.py +72 -83
  77. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/prompt_alignment/template.py +12 -0
  78. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/ragas.py +3 -3
  79. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_adherence/role_adherence.py +48 -71
  80. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_adherence/template.py +14 -0
  81. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_violation/role_violation.py +75 -108
  82. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_violation/template.py +12 -0
  83. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/step_efficiency/step_efficiency.py +55 -65
  84. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/step_efficiency/template.py +11 -0
  85. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/summarization/summarization.py +115 -183
  86. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/summarization/template.py +19 -0
  87. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/task_completion/task_completion.py +67 -73
  88. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_correctness/tool_correctness.py +45 -44
  89. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_use/tool_use.py +42 -66
  90. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/topic_adherence/template.py +13 -0
  91. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/topic_adherence/topic_adherence.py +53 -67
  92. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/toxicity/template.py +13 -0
  93. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/toxicity/toxicity.py +80 -99
  94. deepeval-3.7.6/deepeval/metrics/turn_contextual_precision/schema.py +21 -0
  95. deepeval-3.7.6/deepeval/metrics/turn_contextual_precision/template.py +187 -0
  96. deepeval-3.7.6/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +592 -0
  97. deepeval-3.7.6/deepeval/metrics/turn_contextual_recall/schema.py +21 -0
  98. deepeval-3.7.6/deepeval/metrics/turn_contextual_recall/template.py +178 -0
  99. deepeval-3.7.6/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +563 -0
  100. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy → deepeval-3.7.6/deepeval/metrics/turn_contextual_relevancy}/schema.py +7 -1
  101. deepeval-3.7.6/deepeval/metrics/turn_contextual_relevancy/template.py +161 -0
  102. deepeval-3.7.6/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +576 -0
  103. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness → deepeval-3.7.6/deepeval/metrics/turn_faithfulness}/schema.py +11 -3
  104. deepeval-3.7.6/deepeval/metrics/turn_faithfulness/template.py +218 -0
  105. deepeval-3.7.6/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +627 -0
  106. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/turn_relevancy/template.py +14 -0
  107. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/turn_relevancy/turn_relevancy.py +56 -69
  108. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/utils.py +158 -122
  109. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/__init__.py +0 -12
  110. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/base_model.py +49 -33
  111. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/embedding_models/__init__.py +7 -0
  112. deepeval-3.7.6/deepeval/models/embedding_models/azure_embedding_model.py +166 -0
  113. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/embedding_models/local_embedding_model.py +39 -20
  114. deepeval-3.7.6/deepeval/models/embedding_models/ollama_embedding_model.py +113 -0
  115. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/embedding_models/openai_embedding_model.py +42 -22
  116. deepeval-3.7.6/deepeval/models/llms/amazon_bedrock_model.py +315 -0
  117. deepeval-3.7.6/deepeval/models/llms/anthropic_model.py +297 -0
  118. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/azure_model.py +218 -60
  119. deepeval-3.7.6/deepeval/models/llms/constants.py +2032 -0
  120. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/deepseek_model.py +95 -40
  121. deepeval-3.7.6/deepeval/models/llms/gemini_model.py +432 -0
  122. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/grok_model.py +139 -68
  123. deepeval-3.7.6/deepeval/models/llms/kimi_model.py +297 -0
  124. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/litellm_model.py +131 -37
  125. deepeval-3.7.6/deepeval/models/llms/local_model.py +242 -0
  126. deepeval-3.7.6/deepeval/models/llms/ollama_model.py +237 -0
  127. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/openai_model.py +222 -269
  128. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/portkey_model.py +81 -22
  129. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/utils.py +8 -3
  130. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/retry_policy.py +17 -14
  131. deepeval-3.7.6/deepeval/models/utils.py +177 -0
  132. deepeval-3.7.6/deepeval/optimizer/__init__.py +5 -0
  133. deepeval-3.7.6/deepeval/optimizer/algorithms/__init__.py +6 -0
  134. deepeval-3.7.6/deepeval/optimizer/algorithms/base.py +29 -0
  135. deepeval-3.7.6/deepeval/optimizer/algorithms/configs.py +18 -0
  136. deepeval-3.7.6/deepeval/optimizer/algorithms/copro/__init__.py +5 -0
  137. deepeval-3.7.4/deepeval/optimization/copro/loop.py → deepeval-3.7.6/deepeval/optimizer/algorithms/copro/copro.py +112 -113
  138. deepeval-3.7.6/deepeval/optimizer/algorithms/gepa/__init__.py +5 -0
  139. deepeval-3.7.4/deepeval/optimization/gepa/loop.py → deepeval-3.7.6/deepeval/optimizer/algorithms/gepa/gepa.py +175 -115
  140. deepeval-3.7.6/deepeval/optimizer/algorithms/miprov2/__init__.py +17 -0
  141. deepeval-3.7.6/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +435 -0
  142. deepeval-3.7.6/deepeval/optimizer/algorithms/miprov2/miprov2.py +752 -0
  143. deepeval-3.7.6/deepeval/optimizer/algorithms/miprov2/proposer.py +301 -0
  144. deepeval-3.7.6/deepeval/optimizer/algorithms/simba/__init__.py +5 -0
  145. deepeval-3.7.4/deepeval/optimization/simba/loop.py → deepeval-3.7.6/deepeval/optimizer/algorithms/simba/simba.py +128 -112
  146. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.6/deepeval/optimizer}/configs.py +5 -8
  147. deepeval-3.7.4/deepeval/optimization/policies/selection.py → deepeval-3.7.6/deepeval/optimizer/policies.py +63 -2
  148. deepeval-3.7.6/deepeval/optimizer/prompt_optimizer.py +263 -0
  149. deepeval-3.7.6/deepeval/optimizer/rewriter/__init__.py +5 -0
  150. deepeval-3.7.6/deepeval/optimizer/rewriter/rewriter.py +124 -0
  151. deepeval-3.7.6/deepeval/optimizer/rewriter/utils.py +214 -0
  152. deepeval-3.7.6/deepeval/optimizer/scorer/__init__.py +5 -0
  153. deepeval-3.7.6/deepeval/optimizer/scorer/base.py +86 -0
  154. deepeval-3.7.6/deepeval/optimizer/scorer/scorer.py +316 -0
  155. deepeval-3.7.6/deepeval/optimizer/scorer/utils.py +30 -0
  156. deepeval-3.7.6/deepeval/optimizer/types.py +148 -0
  157. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.6/deepeval/optimizer}/utils.py +47 -165
  158. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/prompt/prompt.py +5 -9
  159. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/simulator/conversation_simulator.py +43 -0
  160. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/simulator/template.py +13 -0
  161. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/__init__.py +1 -3
  162. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/api.py +26 -45
  163. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/arena_test_case.py +7 -2
  164. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/conversational_test_case.py +68 -1
  165. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/llm_test_case.py +206 -1
  166. deepeval-3.7.6/deepeval/test_case/utils.py +20 -0
  167. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/api.py +18 -14
  168. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/test_run.py +3 -3
  169. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/patchers.py +9 -4
  170. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/tracing.py +2 -2
  171. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/utils.py +65 -0
  172. {deepeval-3.7.4 → deepeval-3.7.6}/pyproject.toml +4 -4
  173. deepeval-3.7.4/deepeval/_version.py +0 -1
  174. deepeval-3.7.4/deepeval/dataset/api.py +0 -28
  175. deepeval-3.7.4/deepeval/dataset/golden.py +0 -60
  176. deepeval-3.7.4/deepeval/integrations/pydantic_ai/agent.py +0 -21
  177. deepeval-3.7.4/deepeval/metrics/contextual_precision/template.py +0 -84
  178. deepeval-3.7.4/deepeval/metrics/contextual_recall/template.py +0 -75
  179. deepeval-3.7.4/deepeval/metrics/contextual_relevancy/template.py +0 -77
  180. deepeval-3.7.4/deepeval/metrics/faithfulness/template.py +0 -140
  181. deepeval-3.7.4/deepeval/metrics/knowledge_retention/schema.py +0 -15
  182. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/__init__.py +0 -24
  183. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -343
  184. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -19
  185. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -122
  186. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -301
  187. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -15
  188. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -132
  189. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -285
  190. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -15
  191. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -112
  192. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -282
  193. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -102
  194. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -356
  195. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -175
  196. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -386
  197. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -11
  198. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -148
  199. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -68
  200. deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -290
  201. deepeval-3.7.4/deepeval/model_integrations/__init__.py +0 -0
  202. deepeval-3.7.4/deepeval/models/embedding_models/azure_embedding_model.py +0 -120
  203. deepeval-3.7.4/deepeval/models/embedding_models/ollama_embedding_model.py +0 -80
  204. deepeval-3.7.4/deepeval/models/llms/amazon_bedrock_model.py +0 -161
  205. deepeval-3.7.4/deepeval/models/llms/anthropic_model.py +0 -182
  206. deepeval-3.7.4/deepeval/models/llms/gemini_model.py +0 -287
  207. deepeval-3.7.4/deepeval/models/llms/kimi_model.py +0 -247
  208. deepeval-3.7.4/deepeval/models/llms/local_model.py +0 -138
  209. deepeval-3.7.4/deepeval/models/llms/ollama_model.py +0 -114
  210. deepeval-3.7.4/deepeval/models/mlllms/__init__.py +0 -4
  211. deepeval-3.7.4/deepeval/models/mlllms/azure_model.py +0 -343
  212. deepeval-3.7.4/deepeval/models/mlllms/gemini_model.py +0 -313
  213. deepeval-3.7.4/deepeval/models/mlllms/ollama_model.py +0 -175
  214. deepeval-3.7.4/deepeval/models/mlllms/openai_model.py +0 -309
  215. deepeval-3.7.4/deepeval/models/utils.py +0 -76
  216. deepeval-3.7.4/deepeval/optimization/__init__.py +0 -13
  217. deepeval-3.7.4/deepeval/optimization/adapters/__init__.py +0 -2
  218. deepeval-3.7.4/deepeval/optimization/adapters/deepeval_scoring_adapter.py +0 -588
  219. deepeval-3.7.4/deepeval/optimization/aggregates.py +0 -14
  220. deepeval-3.7.4/deepeval/optimization/copro/configs.py +0 -31
  221. deepeval-3.7.4/deepeval/optimization/gepa/__init__.py +0 -7
  222. deepeval-3.7.4/deepeval/optimization/gepa/configs.py +0 -115
  223. deepeval-3.7.4/deepeval/optimization/miprov2/configs.py +0 -134
  224. deepeval-3.7.4/deepeval/optimization/miprov2/loop.py +0 -785
  225. deepeval-3.7.4/deepeval/optimization/mutations/__init__.py +0 -0
  226. deepeval-3.7.4/deepeval/optimization/mutations/prompt_rewriter.py +0 -458
  227. deepeval-3.7.4/deepeval/optimization/policies/__init__.py +0 -16
  228. deepeval-3.7.4/deepeval/optimization/policies/tie_breaker.py +0 -67
  229. deepeval-3.7.4/deepeval/optimization/prompt_optimizer.py +0 -462
  230. deepeval-3.7.4/deepeval/optimization/simba/__init__.py +0 -0
  231. deepeval-3.7.4/deepeval/optimization/simba/configs.py +0 -33
  232. deepeval-3.7.4/deepeval/optimization/types.py +0 -361
  233. deepeval-3.7.4/deepeval/plugins/__init__.py +0 -0
  234. deepeval-3.7.4/deepeval/synthesizer/chunking/__init__.py +0 -0
  235. deepeval-3.7.4/deepeval/test_case/mllm_test_case.py +0 -170
  236. deepeval-3.7.4/deepeval/test_case/utils.py +0 -24
  237. {deepeval-3.7.4 → deepeval-3.7.6}/LICENSE.md +0 -0
  238. {deepeval-3.7.4 → deepeval-3.7.6}/README.md +0 -0
  239. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/__init__.py +0 -0
  240. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/annotation/__init__.py +0 -0
  241. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/annotation/annotation.py +0 -0
  242. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/annotation/api.py +0 -0
  243. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/anthropic/__init__.py +0 -0
  244. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/anthropic/extractors.py +0 -0
  245. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/anthropic/patch.py +0 -0
  246. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/anthropic/utils.py +0 -0
  247. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/__init__.py +0 -0
  248. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/arc/__init__.py +0 -0
  249. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/arc/arc.py +0 -0
  250. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/arc/mode.py +0 -0
  251. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/arc/template.py +0 -0
  252. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/base_benchmark.py +0 -0
  253. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bbq/__init__.py +0 -0
  254. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bbq/bbq.py +0 -0
  255. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bbq/task.py +0 -0
  256. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bbq/template.py +0 -0
  257. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  258. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  259. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  260. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  261. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  262. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  263. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  264. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  265. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  266. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  267. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  268. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  269. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  270. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  271. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  272. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  273. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  274. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  275. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  276. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  277. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  278. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  279. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  280. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  281. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  282. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  283. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  284. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  285. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  286. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  287. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  288. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  289. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  290. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  291. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  292. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  293. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  294. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  295. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  296. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  297. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  298. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  299. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  300. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  301. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  302. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  303. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  304. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  305. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  306. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  307. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  308. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  309. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  310. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  311. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  312. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  313. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  314. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  315. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  316. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  317. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  318. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  319. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/bool_q/template.py +0 -0
  320. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/drop/__init__.py +0 -0
  321. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/drop/drop.py +0 -0
  322. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/drop/task.py +0 -0
  323. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/drop/template.py +0 -0
  324. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  325. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  326. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  327. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  328. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  329. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  330. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/gsm8k/template.py +0 -0
  331. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  332. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  333. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/hellaswag/task.py +0 -0
  334. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/hellaswag/template.py +0 -0
  335. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  336. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  337. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/human_eval/task.py +0 -0
  338. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/human_eval/template.py +0 -0
  339. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  340. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  341. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/ifeval/template.py +0 -0
  342. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/lambada/__init__.py +0 -0
  343. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/lambada/lambada.py +0 -0
  344. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/lambada/template.py +0 -0
  345. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  346. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  347. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/logi_qa/task.py +0 -0
  348. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/logi_qa/template.py +0 -0
  349. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  350. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  351. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/math_qa/task.py +0 -0
  352. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/math_qa/template.py +0 -0
  353. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  354. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  355. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/mmlu/task.py +0 -0
  356. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/mmlu/template.py +0 -0
  357. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/modes/__init__.py +0 -0
  358. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/results.py +0 -0
  359. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/schema.py +0 -0
  360. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/squad/__init__.py +0 -0
  361. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/squad/squad.py +0 -0
  362. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/squad/task.py +0 -0
  363. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/squad/template.py +0 -0
  364. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/tasks/__init__.py +0 -0
  365. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  366. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  367. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  368. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  369. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  370. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/utils.py +0 -0
  371. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  372. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/winogrande/template.py +0 -0
  373. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  374. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/__init__.py +0 -0
  375. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/dotenv_handler.py +0 -0
  376. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/main.py +0 -0
  377. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/server.py +0 -0
  378. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/test.py +0 -0
  379. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/types.py +0 -0
  380. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/cli/utils.py +0 -0
  381. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/confident/__init__.py +0 -0
  382. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/confident/api.py +0 -0
  383. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/confident/types.py +0 -0
  384. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/config/__init__.py +0 -0
  385. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/config/logging.py +0 -0
  386. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/config/settings_manager.py +0 -0
  387. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/config/utils.py +0 -0
  388. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/constants.py +0 -0
  389. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/contextvars.py +0 -0
  390. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/dataset/__init__.py +0 -0
  391. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/dataset/dataset.py +0 -0
  392. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/dataset/test_run_tracer.py +0 -0
  393. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/dataset/types.py +0 -0
  394. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/dataset/utils.py +0 -0
  395. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/errors.py +0 -0
  396. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/__init__.py +0 -0
  397. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/api.py +0 -0
  398. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/compare.py +0 -0
  399. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/configs.py +0 -0
  400. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/evaluate/types.py +0 -0
  401. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/__init__.py +0 -0
  402. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/crewai/__init__.py +0 -0
  403. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/crewai/handler.py +0 -0
  404. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/crewai/subs.py +0 -0
  405. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/crewai/tool.py +0 -0
  406. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/crewai/wrapper.py +0 -0
  407. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/hugging_face/__init__.py +0 -0
  408. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/hugging_face/callback.py +0 -0
  409. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  410. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  411. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/hugging_face/utils.py +0 -0
  412. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/langchain/__init__.py +0 -0
  413. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/langchain/callback.py +0 -0
  414. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/langchain/patch.py +0 -0
  415. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/langchain/utils.py +0 -0
  416. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/llama_index/__init__.py +0 -0
  417. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/llama_index/handler.py +0 -0
  418. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/llama_index/utils.py +0 -0
  419. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  420. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  421. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
  422. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  423. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  424. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/api.py +0 -0
  425. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  426. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  427. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  428. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  429. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/argument_correctness/schema.py +0 -0
  430. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/bias/__init__.py +0 -0
  431. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/bias/schema.py +0 -0
  432. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  433. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_precision/schema.py +0 -0
  434. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  435. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_recall/schema.py +0 -0
  436. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  437. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  438. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  439. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  440. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  441. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  442. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  443. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  444. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/__init__.py +0 -0
  445. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/graph.py +0 -0
  446. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/schema.py +0 -0
  447. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/dag/utils.py +0 -0
  448. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/exact_match/__init__.py +0 -0
  449. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/faithfulness/__init__.py +0 -0
  450. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/g_eval/__init__.py +0 -0
  451. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/g_eval/schema.py +0 -0
  452. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
  453. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/goal_accuracy/schema.py +0 -0
  454. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/hallucination/__init__.py +0 -0
  455. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/hallucination/schema.py +0 -0
  456. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/json_correctness/__init__.py +0 -0
  457. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/json_correctness/schema.py +0 -0
  458. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  459. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp/__init__.py +0 -0
  460. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp/schema.py +0 -0
  461. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  462. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  463. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/misuse/__init__.py +0 -0
  464. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/misuse/schema.py +0 -0
  465. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  466. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  467. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  468. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  469. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  470. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  471. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  472. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  473. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  474. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  475. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  476. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  477. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy → deepeval-3.7.6/deepeval/metrics/multimodal_metrics/text_to_image}/__init__.py +0 -0
  478. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  479. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  480. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/non_advice/__init__.py +0 -0
  481. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/non_advice/schema.py +0 -0
  482. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision → deepeval-3.7.6/deepeval/metrics/pattern_match}/__init__.py +0 -0
  483. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  484. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/pii_leakage/schema.py +0 -0
  485. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_adherence/__init__.py +0 -0
  486. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_adherence/schema.py +0 -0
  487. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_quality/__init__.py +0 -0
  488. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/plan_quality/schema.py +0 -0
  489. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall → deepeval-3.7.6/deepeval/metrics/prompt_alignment}/__init__.py +0 -0
  490. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  491. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy → deepeval-3.7.6/deepeval/metrics/role_adherence}/__init__.py +0 -0
  492. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_adherence/schema.py +0 -0
  493. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_violation/__init__.py +0 -0
  494. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/role_violation/schema.py +0 -0
  495. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/step_efficiency/__init__.py +0 -0
  496. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/step_efficiency/schema.py +0 -0
  497. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/summarization/__init__.py +0 -0
  498. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/summarization/schema.py +0 -0
  499. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_faithfulness → deepeval-3.7.6/deepeval/metrics/task_completion}/__init__.py +0 -0
  500. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/task_completion/schema.py +0 -0
  501. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/task_completion/template.py +0 -0
  502. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_g_eval → deepeval-3.7.6/deepeval/metrics/tool_correctness}/__init__.py +0 -0
  503. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_correctness/schema.py +0 -0
  504. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_correctness/template.py +0 -0
  505. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_use/__init__.py +0 -0
  506. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_use/schema.py +0 -0
  507. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/tool_use/template.py +0 -0
  508. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/topic_adherence/__init__.py +0 -0
  509. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/topic_adherence/schema.py +0 -0
  510. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/toxicity/__init__.py +0 -0
  511. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/toxicity/schema.py +0 -0
  512. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness → deepeval-3.7.6/deepeval/metrics/turn_contextual_precision}/__init__.py +0 -0
  513. {deepeval-3.7.4/deepeval/metrics/multimodal_metrics/text_to_image → deepeval-3.7.6/deepeval/metrics/turn_contextual_recall}/__init__.py +0 -0
  514. {deepeval-3.7.4/deepeval/metrics/pattern_match → deepeval-3.7.6/deepeval/metrics/turn_contextual_relevancy}/__init__.py +0 -0
  515. {deepeval-3.7.4/deepeval/metrics/prompt_alignment → deepeval-3.7.6/deepeval/metrics/turn_faithfulness}/__init__.py +0 -0
  516. {deepeval-3.7.4/deepeval/metrics/role_adherence → deepeval-3.7.6/deepeval/metrics/turn_relevancy}/__init__.py +0 -0
  517. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  518. {deepeval-3.7.4/deepeval/metrics/task_completion → deepeval-3.7.6/deepeval/model_integrations}/__init__.py +0 -0
  519. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/model_integrations/types.py +0 -0
  520. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/model_integrations/utils.py +0 -0
  521. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/_summac_model.py +0 -0
  522. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/answer_relevancy_model.py +0 -0
  523. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/detoxify_model.py +0 -0
  524. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/hallucination_model.py +0 -0
  525. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/llms/__init__.py +0 -0
  526. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/summac_model.py +0 -0
  527. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/models/unbias_model.py +0 -0
  528. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai/__init__.py +0 -0
  529. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai/extractors.py +0 -0
  530. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai/patch.py +0 -0
  531. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai/utils.py +0 -0
  532. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/__init__.py +0 -0
  533. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/agent.py +0 -0
  534. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/callback_handler.py +0 -0
  535. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/extractors.py +0 -0
  536. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/patch.py +0 -0
  537. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/openai_agents/runner.py +0 -0
  538. {deepeval-3.7.4/deepeval/optimization → deepeval-3.7.6/deepeval/optimizer/algorithms}/simba/types.py +0 -0
  539. {deepeval-3.7.4/deepeval/metrics/tool_correctness → deepeval-3.7.6/deepeval/plugins}/__init__.py +0 -0
  540. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/plugins/plugin.py +0 -0
  541. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/progress_context.py +0 -0
  542. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/prompt/__init__.py +0 -0
  543. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/prompt/api.py +0 -0
  544. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/prompt/utils.py +0 -0
  545. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/py.typed +0 -0
  546. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/red_teaming/README.md +0 -0
  547. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/scorer/__init__.py +0 -0
  548. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/scorer/scorer.py +0 -0
  549. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/simulator/__init__.py +0 -0
  550. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/simulator/schema.py +0 -0
  551. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/singleton.py +0 -0
  552. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/__init__.py +0 -0
  553. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/base_synthesizer.py +0 -0
  554. {deepeval-3.7.4/deepeval/metrics/turn_relevancy → deepeval-3.7.6/deepeval/synthesizer/chunking}/__init__.py +0 -0
  555. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  556. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  557. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/config.py +0 -0
  558. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/schema.py +0 -0
  559. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/synthesizer.py +0 -0
  560. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/templates/__init__.py +0 -0
  561. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/templates/template.py +0 -0
  562. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  563. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  564. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/types.py +0 -0
  565. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/synthesizer/utils.py +0 -0
  566. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/telemetry.py +0 -0
  567. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_case/mcp.py +0 -0
  568. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/__init__.py +0 -0
  569. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/cache.py +0 -0
  570. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/hooks.py +0 -0
  571. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/test_run/hyperparameters.py +0 -0
  572. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/__init__.py +0 -0
  573. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/api.py +0 -0
  574. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/context.py +0 -0
  575. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/offline_evals/__init__.py +0 -0
  576. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/offline_evals/api.py +0 -0
  577. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/offline_evals/span.py +0 -0
  578. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/offline_evals/thread.py +0 -0
  579. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/offline_evals/trace.py +0 -0
  580. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/otel/__init__.py +0 -0
  581. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/otel/exporter.py +0 -0
  582. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/otel/test_exporter.py +0 -0
  583. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/otel/utils.py +0 -0
  584. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  585. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/trace_context.py +0 -0
  586. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/trace_test_manager.py +0 -0
  587. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/types.py +0 -0
  588. {deepeval-3.7.4 → deepeval-3.7.6}/deepeval/tracing/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.4
3
+ Version: 3.7.6
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -13,13 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Requires-Dist: aiohttp
16
- Requires-Dist: anthropic
17
16
  Requires-Dist: click (>=8.0.0,<8.3.0)
18
- Requires-Dist: google-genai (>=1.9.0,<2.0.0)
19
17
  Requires-Dist: grpcio (>=1.67.1,<2.0.0)
20
18
  Requires-Dist: jinja2
21
19
  Requires-Dist: nest_asyncio
22
- Requires-Dist: ollama
23
20
  Requires-Dist: openai
24
21
  Requires-Dist: opentelemetry-api (>=1.24.0,<2.0.0)
25
22
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.24.0,<2.0.0)
@@ -0,0 +1 @@
1
+ __version__: str = "3.7.6"
@@ -27,6 +27,7 @@ from pydantic import (
27
27
  field_validator,
28
28
  model_validator,
29
29
  SecretStr,
30
+ PositiveFloat,
30
31
  )
31
32
  from pydantic_settings import BaseSettings, SettingsConfigDict
32
33
  from typing import Any, Dict, List, Optional, NamedTuple
@@ -317,6 +318,19 @@ class Settings(BaseSettings):
317
318
 
318
319
  # Anthropic
319
320
  ANTHROPIC_API_KEY: Optional[SecretStr] = None
321
+ ANTHROPIC_MODEL_NAME: Optional[str] = None
322
+ ANTHROPIC_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
323
+ ANTHROPIC_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
324
+
325
+ # AWS
326
+ AWS_ACCESS_KEY_ID: Optional[SecretStr] = None
327
+ AWS_SECRET_ACCESS_KEY: Optional[SecretStr] = None
328
+ # AWS Bedrock
329
+ USE_AWS_BEDROCK_MODEL: Optional[bool] = None
330
+ AWS_BEDROCK_MODEL_NAME: Optional[str] = None
331
+ AWS_BEDROCK_REGION: Optional[str] = None
332
+ AWS_BEDROCK_COST_PER_INPUT_TOKEN: Optional[PositiveFloat] = None
333
+ AWS_BEDROCK_COST_PER_OUTPUT_TOKEN: Optional[PositiveFloat] = None
320
334
  # Azure Open AI
321
335
  AZURE_OPENAI_API_KEY: Optional[SecretStr] = None
322
336
  AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = None
@@ -329,6 +343,8 @@ class Settings(BaseSettings):
329
343
  USE_DEEPSEEK_MODEL: Optional[bool] = None
330
344
  DEEPSEEK_API_KEY: Optional[SecretStr] = None
331
345
  DEEPSEEK_MODEL_NAME: Optional[str] = None
346
+ DEEPSEEK_COST_PER_INPUT_TOKEN: Optional[float] = None
347
+ DEEPSEEK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
332
348
  # Gemini
333
349
  USE_GEMINI_MODEL: Optional[bool] = None
334
350
  GOOGLE_API_KEY: Optional[SecretStr] = None
@@ -336,11 +352,13 @@ class Settings(BaseSettings):
336
352
  GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
337
353
  GOOGLE_CLOUD_PROJECT: Optional[str] = None
338
354
  GOOGLE_CLOUD_LOCATION: Optional[str] = None
339
- GOOGLE_SERVICE_ACCOUNT_KEY: Optional[str] = None
355
+ GOOGLE_SERVICE_ACCOUNT_KEY: Optional[SecretStr] = None
340
356
  # Grok
341
357
  USE_GROK_MODEL: Optional[bool] = None
342
358
  GROK_API_KEY: Optional[SecretStr] = None
343
359
  GROK_MODEL_NAME: Optional[str] = None
360
+ GROK_COST_PER_INPUT_TOKEN: Optional[float] = None
361
+ GROK_COST_PER_OUTPUT_TOKEN: Optional[float] = None
344
362
  # LiteLLM
345
363
  USE_LITELLM: Optional[bool] = None
346
364
  LITELLM_API_KEY: Optional[SecretStr] = None
@@ -362,6 +380,8 @@ class Settings(BaseSettings):
362
380
  USE_MOONSHOT_MODEL: Optional[bool] = None
363
381
  MOONSHOT_API_KEY: Optional[SecretStr] = None
364
382
  MOONSHOT_MODEL_NAME: Optional[str] = None
383
+ MOONSHOT_COST_PER_INPUT_TOKEN: Optional[float] = None
384
+ MOONSHOT_COST_PER_OUTPUT_TOKEN: Optional[float] = None
365
385
  # Ollama
366
386
  OLLAMA_MODEL_NAME: Optional[str] = None
367
387
  # OpenAI
@@ -388,6 +408,7 @@ class Settings(BaseSettings):
388
408
 
389
409
  # Azure OpenAI
390
410
  USE_AZURE_OPENAI_EMBEDDING: Optional[bool] = None
411
+ AZURE_EMBEDDING_MODEL_NAME: Optional[str] = None
391
412
  AZURE_EMBEDDING_DEPLOYMENT_NAME: Optional[str] = None
392
413
  # Local
393
414
  USE_LOCAL_EMBEDDINGS: Optional[bool] = None
@@ -614,6 +635,7 @@ class Settings(BaseSettings):
614
635
  "SKIP_DEEPEVAL_MISSING_PARAMS",
615
636
  "TOKENIZERS_PARALLELISM",
616
637
  "TRANSFORMERS_NO_ADVISORY_WARNINGS",
638
+ "USE_AWS_BEDROCK_MODEL",
617
639
  "USE_OPENAI_MODEL",
618
640
  "USE_AZURE_OPENAI",
619
641
  "USE_LOCAL_MODEL",
@@ -647,6 +669,8 @@ class Settings(BaseSettings):
647
669
  @field_validator(
648
670
  "OPENAI_COST_PER_INPUT_TOKEN",
649
671
  "OPENAI_COST_PER_OUTPUT_TOKEN",
672
+ "AWS_BEDROCK_COST_PER_INPUT_TOKEN",
673
+ "AWS_BEDROCK_COST_PER_OUTPUT_TOKEN",
650
674
  "TEMPERATURE",
651
675
  "CONFIDENT_TRACE_SAMPLE_RATE",
652
676
  "CONFIDENT_METRIC_LOGGING_SAMPLE_RATE",
@@ -717,6 +741,16 @@ class Settings(BaseSettings):
717
741
  return None
718
742
  return s.upper()
719
743
 
744
+ @field_validator("AWS_BEDROCK_REGION", mode="before")
745
+ @classmethod
746
+ def _normalize_lower(cls, v):
747
+ if v is None:
748
+ return None
749
+ s = str(v).strip()
750
+ if not s:
751
+ return None
752
+ return s.lower()
753
+
720
754
  @field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="before")
721
755
  @classmethod
722
756
  def _coerce_to_list(cls, v):
@@ -0,0 +1,50 @@
1
+ from pydantic import BaseModel, Field, model_validator
2
+ from typing import Optional, List
3
+
4
+ from deepeval.dataset.golden import Golden, ConversationalGolden
5
+
6
+
7
+ class APIDataset(BaseModel):
8
+ finalized: bool
9
+ goldens: Optional[List[Golden]] = Field(None)
10
+ conversational_goldens: Optional[List[ConversationalGolden]] = Field(
11
+ None, alias="conversationalGoldens"
12
+ )
13
+
14
+ @model_validator(mode="after")
15
+ def set_image_mappings_for_goldens(self):
16
+ if self.goldens:
17
+ for golden in self.goldens:
18
+ golden.images_mapping = golden._get_images_mapping()
19
+ if self.conversational_goldens:
20
+ for golden in self.conversational_goldens:
21
+ golden.images_mapping = golden._get_images_mapping()
22
+
23
+ return self
24
+
25
+
26
+ class APIQueueDataset(BaseModel):
27
+ alias: str
28
+ goldens: Optional[List[Golden]] = Field(None)
29
+ conversational_goldens: Optional[List[ConversationalGolden]] = Field(
30
+ None, alias="conversationalGoldens"
31
+ )
32
+
33
+ @model_validator(mode="after")
34
+ def set_image_mappings_for_goldens(self):
35
+ if self.goldens:
36
+ for golden in self.goldens:
37
+ golden.images_mapping = golden._get_images_mapping()
38
+ if self.conversational_goldens:
39
+ for golden in self.conversational_goldens:
40
+ golden.images_mapping = golden._get_images_mapping()
41
+
42
+ return self
43
+
44
+
45
+ class DatasetHttpResponse(BaseModel):
46
+ id: str
47
+ goldens: Optional[List[Golden]] = Field(None, alias="goldens")
48
+ conversational_goldens: Optional[List[ConversationalGolden]] = Field(
49
+ None, alias="conversationalGoldens"
50
+ )
@@ -0,0 +1,197 @@
1
+ import re
2
+ from pydantic import BaseModel, Field, PrivateAttr, model_validator
3
+ from typing import Optional, Dict, List
4
+ from deepeval.test_case import ToolCall, Turn, MLLMImage
5
+ from deepeval.test_case.llm_test_case import _MLLM_IMAGE_REGISTRY
6
+
7
+
8
+ class Golden(BaseModel):
9
+ input: str
10
+ actual_output: Optional[str] = Field(
11
+ default=None, serialization_alias="actualOutput"
12
+ )
13
+ expected_output: Optional[str] = Field(
14
+ default=None, serialization_alias="expectedOutput"
15
+ )
16
+ context: Optional[List[str]] = Field(default=None)
17
+ retrieval_context: Optional[List[str]] = Field(
18
+ default=None, serialization_alias="retrievalContext"
19
+ )
20
+ additional_metadata: Optional[Dict] = Field(
21
+ default=None, serialization_alias="additionalMetadata"
22
+ )
23
+ comments: Optional[str] = Field(default=None)
24
+ tools_called: Optional[List[ToolCall]] = Field(
25
+ default=None, serialization_alias="toolsCalled"
26
+ )
27
+ expected_tools: Optional[List[ToolCall]] = Field(
28
+ default=None, serialization_alias="expectedTools"
29
+ )
30
+ source_file: Optional[str] = Field(
31
+ default=None, serialization_alias="sourceFile"
32
+ )
33
+ name: Optional[str] = Field(default=None)
34
+ custom_column_key_values: Optional[Dict[str, str]] = Field(
35
+ default=None, serialization_alias="customColumnKeyValues"
36
+ )
37
+ multimodal: bool = Field(False, exclude=True)
38
+ images_mapping: Dict[str, MLLMImage] = Field(
39
+ default=None, alias="imagesMapping"
40
+ )
41
+ _dataset_rank: Optional[int] = PrivateAttr(default=None)
42
+ _dataset_alias: Optional[str] = PrivateAttr(default=None)
43
+ _dataset_id: Optional[str] = PrivateAttr(default=None)
44
+
45
+ @model_validator(mode="after")
46
+ def set_is_multimodal(self):
47
+ import re
48
+
49
+ if self.multimodal is True:
50
+ return self
51
+
52
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
53
+ auto_detect = (
54
+ any(
55
+ [
56
+ re.search(pattern, self.input or "") is not None,
57
+ re.search(pattern, self.actual_output or "") is not None,
58
+ ]
59
+ )
60
+ if isinstance(self.input, str)
61
+ else self.multimodal
62
+ )
63
+ if self.retrieval_context is not None:
64
+ auto_detect = auto_detect or any(
65
+ re.search(pattern, context) is not None
66
+ for context in self.retrieval_context
67
+ )
68
+ if self.context is not None:
69
+ auto_detect = auto_detect or any(
70
+ re.search(pattern, context) is not None
71
+ for context in self.context
72
+ )
73
+
74
+ self.multimodal = auto_detect
75
+
76
+ return self
77
+
78
+ def _get_images_mapping(self) -> Dict[str, MLLMImage]:
79
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
80
+ image_ids = set()
81
+
82
+ def extract_ids_from_string(s: Optional[str]) -> None:
83
+ """Helper to extract image IDs from a string."""
84
+ if s is not None and isinstance(s, str):
85
+ matches = re.findall(pattern, s)
86
+ image_ids.update(matches)
87
+
88
+ def extract_ids_from_list(lst: Optional[List[str]]) -> None:
89
+ """Helper to extract image IDs from a list of strings."""
90
+ if lst is not None:
91
+ for item in lst:
92
+ extract_ids_from_string(item)
93
+
94
+ extract_ids_from_string(self.input)
95
+ extract_ids_from_string(self.actual_output)
96
+ extract_ids_from_string(self.expected_output)
97
+ extract_ids_from_list(self.context)
98
+ extract_ids_from_list(self.retrieval_context)
99
+
100
+ images_mapping = {}
101
+ for img_id in image_ids:
102
+ if img_id in _MLLM_IMAGE_REGISTRY:
103
+ images_mapping[img_id] = _MLLM_IMAGE_REGISTRY[img_id]
104
+
105
+ return images_mapping if len(images_mapping) > 0 else None
106
+
107
+
108
+ class ConversationalGolden(BaseModel):
109
+ scenario: str
110
+ expected_outcome: Optional[str] = Field(
111
+ None, serialization_alias="expectedOutcome"
112
+ )
113
+ user_description: Optional[str] = Field(
114
+ None, serialization_alias="userDescription"
115
+ )
116
+ context: Optional[List[str]] = Field(default=None)
117
+ additional_metadata: Optional[Dict] = Field(
118
+ default=None, serialization_alias="additionalMetadata"
119
+ )
120
+ comments: Optional[str] = Field(default=None)
121
+ name: Optional[str] = Field(default=None)
122
+ custom_column_key_values: Optional[Dict[str, str]] = Field(
123
+ default=None, serialization_alias="customColumnKeyValues"
124
+ )
125
+ turns: Optional[List[Turn]] = Field(default=None)
126
+ multimodal: bool = Field(False, exclude=True)
127
+ images_mapping: Dict[str, MLLMImage] = Field(
128
+ default=None, alias="imagesMapping"
129
+ )
130
+ _dataset_rank: Optional[int] = PrivateAttr(default=None)
131
+ _dataset_alias: Optional[str] = PrivateAttr(default=None)
132
+ _dataset_id: Optional[str] = PrivateAttr(default=None)
133
+
134
+ @model_validator(mode="after")
135
+ def set_is_multimodal(self):
136
+ import re
137
+
138
+ if self.multimodal is True:
139
+ return self
140
+
141
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
142
+ if self.scenario:
143
+ if re.search(pattern, self.scenario) is not None:
144
+ self.multimodal = True
145
+ return self
146
+ if self.expected_outcome:
147
+ if re.search(pattern, self.expected_outcome) is not None:
148
+ self.multimodal = True
149
+ return self
150
+ if self.user_description:
151
+ if re.search(pattern, self.user_description) is not None:
152
+ self.multimodal = True
153
+ return self
154
+ if self.turns:
155
+ for turn in self.turns:
156
+ if re.search(pattern, turn.content) is not None:
157
+ self.multimodal = True
158
+ return self
159
+ if turn.retrieval_context is not None:
160
+ self.multimodal = any(
161
+ re.search(pattern, context) is not None
162
+ for context in turn.retrieval_context
163
+ )
164
+
165
+ return self
166
+
167
+ def _get_images_mapping(self) -> Dict[str, MLLMImage]:
168
+ pattern = r"\[DEEPEVAL:IMAGE:(.*?)\]"
169
+ image_ids = set()
170
+
171
+ def extract_ids_from_string(s: Optional[str]) -> None:
172
+ """Helper to extract image IDs from a string."""
173
+ if s is not None and isinstance(s, str):
174
+ matches = re.findall(pattern, s)
175
+ image_ids.update(matches)
176
+
177
+ def extract_ids_from_list(lst: Optional[List[str]]) -> None:
178
+ """Helper to extract image IDs from a list of strings."""
179
+ if lst is not None:
180
+ for item in lst:
181
+ extract_ids_from_string(item)
182
+
183
+ extract_ids_from_string(self.scenario)
184
+ extract_ids_from_string(self.expected_outcome)
185
+ extract_ids_from_list(self.context)
186
+ extract_ids_from_string(self.user_description)
187
+ if self.turns:
188
+ for turn in self.turns:
189
+ extract_ids_from_string(turn.content)
190
+ extract_ids_from_list(turn.retrieval_context)
191
+
192
+ images_mapping = {}
193
+ for img_id in image_ids:
194
+ if img_id in _MLLM_IMAGE_REGISTRY:
195
+ images_mapping[img_id] = _MLLM_IMAGE_REGISTRY[img_id]
196
+
197
+ return images_mapping if len(images_mapping) > 0 else None
@@ -46,7 +46,6 @@ from deepeval.telemetry import capture_evaluation_run
46
46
  from deepeval.metrics import (
47
47
  BaseMetric,
48
48
  BaseConversationalMetric,
49
- BaseMultimodalMetric,
50
49
  )
51
50
  from deepeval.metrics.indicator import (
52
51
  format_metric_description,
@@ -54,7 +53,6 @@ from deepeval.metrics.indicator import (
54
53
  from deepeval.test_case import (
55
54
  LLMTestCase,
56
55
  ConversationalTestCase,
57
- MLLMTestCase,
58
56
  )
59
57
  from deepeval.test_run import (
60
58
  global_test_run_manager,
@@ -71,14 +69,11 @@ from deepeval.evaluate.execute import (
71
69
 
72
70
 
73
71
  def assert_test(
74
- test_case: Optional[
75
- Union[LLMTestCase, ConversationalTestCase, MLLMTestCase]
76
- ] = None,
72
+ test_case: Optional[Union[LLMTestCase, ConversationalTestCase]] = None,
77
73
  metrics: Optional[
78
74
  Union[
79
75
  List[BaseMetric],
80
76
  List[BaseConversationalMetric],
81
- List[BaseMultimodalMetric],
82
77
  ]
83
78
  ] = None,
84
79
  golden: Optional[Golden] = None,
@@ -175,7 +170,7 @@ def assert_test(
175
170
  try:
176
171
  if not metric_data.success:
177
172
  failed_metrics_data.append(metric_data)
178
- except:
173
+ except Exception:
179
174
  failed_metrics_data.append(metric_data)
180
175
 
181
176
  failed_metrics_str = ", ".join(
@@ -188,14 +183,11 @@ def assert_test(
188
183
 
189
184
 
190
185
  def evaluate(
191
- test_cases: Union[
192
- List[LLMTestCase], List[ConversationalTestCase], List[MLLMTestCase]
193
- ],
186
+ test_cases: Union[List[LLMTestCase], List[ConversationalTestCase]],
194
187
  metrics: Optional[
195
188
  Union[
196
189
  List[BaseMetric],
197
190
  List[BaseConversationalMetric],
198
- List[BaseMultimodalMetric],
199
191
  ]
200
192
  ] = None,
201
193
  # Evals on Confident AI
@@ -272,6 +264,19 @@ def evaluate(
272
264
  test_run.hyperparameters = process_hyperparameters(hyperparameters)
273
265
  test_run.prompts = process_prompts(hyperparameters)
274
266
  global_test_run_manager.save_test_run(TEMP_FILE_PATH)
267
+
268
+ # In CLI mode (`deepeval test run`), the CLI owns finalization and will
269
+ # call `wrap_up_test_run()` once after pytest finishes. Finalizing here
270
+ # as well would double finalize the run and consequently result in
271
+ # duplicate uploads / local saves and temp file races, so only
272
+ # do it when we're NOT in CLI mode.
273
+ if get_is_running_deepeval():
274
+ return EvaluationResult(
275
+ test_results=test_results,
276
+ confident_link=None,
277
+ test_run_id=None,
278
+ )
279
+
275
280
  res = global_test_run_manager.wrap_up_test_run(
276
281
  run_duration, display_table=False
277
282
  )