deepeval 3.7.3__tar.gz → 3.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. {deepeval-3.7.3 → deepeval-3.7.4}/PKG-INFO +1 -1
  2. deepeval-3.7.4/deepeval/_version.py +1 -0
  3. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/test.py +1 -1
  4. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/settings.py +102 -13
  5. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/configs.py +1 -1
  6. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/execute.py +4 -1
  7. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/template.py +4 -4
  8. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/template.py +2 -2
  9. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/template.py +3 -3
  10. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/template.py +6 -6
  11. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/template.py +2 -2
  12. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/template.py +3 -3
  13. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/template.py +2 -2
  14. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/templates.py +4 -4
  15. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/template.py +4 -3
  16. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/templates.py +4 -4
  17. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/template.py +4 -4
  18. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/template.py +4 -4
  19. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/template.py +2 -2
  20. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  21. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  22. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  23. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  24. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  25. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  26. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/template.py +2 -2
  27. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/template.py +2 -2
  28. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/template.py +4 -4
  29. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/template.py +2 -2
  30. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  31. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/template.py +4 -4
  32. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/template.py +2 -2
  33. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  34. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/local_embedding_model.py +23 -10
  35. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  36. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  37. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/anthropic_model.py +17 -5
  38. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/azure_model.py +30 -18
  39. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/deepseek_model.py +22 -12
  40. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/gemini_model.py +120 -87
  41. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/grok_model.py +23 -16
  42. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/kimi_model.py +23 -12
  43. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/litellm_model.py +63 -25
  44. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/local_model.py +26 -18
  45. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/ollama_model.py +17 -7
  46. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/openai_model.py +22 -17
  47. deepeval-3.7.4/deepeval/models/llms/portkey_model.py +132 -0
  48. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/azure_model.py +28 -19
  49. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/gemini_model.py +102 -73
  50. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/ollama_model.py +40 -9
  51. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/openai_model.py +65 -14
  52. deepeval-3.7.4/deepeval/models/utils.py +76 -0
  53. deepeval-3.7.4/deepeval/optimization/__init__.py +13 -0
  54. deepeval-3.7.4/deepeval/optimization/adapters/__init__.py +2 -0
  55. deepeval-3.7.4/deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  56. deepeval-3.7.4/deepeval/optimization/aggregates.py +14 -0
  57. deepeval-3.7.4/deepeval/optimization/configs.py +34 -0
  58. deepeval-3.7.4/deepeval/optimization/copro/configs.py +31 -0
  59. deepeval-3.7.4/deepeval/optimization/copro/loop.py +837 -0
  60. deepeval-3.7.4/deepeval/optimization/gepa/__init__.py +7 -0
  61. deepeval-3.7.4/deepeval/optimization/gepa/configs.py +115 -0
  62. deepeval-3.7.4/deepeval/optimization/gepa/loop.py +677 -0
  63. deepeval-3.7.4/deepeval/optimization/miprov2/configs.py +134 -0
  64. deepeval-3.7.4/deepeval/optimization/miprov2/loop.py +785 -0
  65. deepeval-3.7.4/deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  66. deepeval-3.7.4/deepeval/optimization/policies/__init__.py +16 -0
  67. deepeval-3.7.4/deepeval/optimization/policies/selection.py +166 -0
  68. deepeval-3.7.4/deepeval/optimization/policies/tie_breaker.py +67 -0
  69. deepeval-3.7.4/deepeval/optimization/prompt_optimizer.py +462 -0
  70. deepeval-3.7.4/deepeval/optimization/simba/configs.py +33 -0
  71. deepeval-3.7.4/deepeval/optimization/simba/loop.py +983 -0
  72. deepeval-3.7.4/deepeval/optimization/simba/types.py +15 -0
  73. deepeval-3.7.4/deepeval/optimization/types.py +361 -0
  74. deepeval-3.7.4/deepeval/optimization/utils.py +598 -0
  75. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/prompt.py +10 -5
  76. deepeval-3.7.4/deepeval/py.typed +0 -0
  77. deepeval-3.7.4/deepeval/synthesizer/chunking/__init__.py +0 -0
  78. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/cache.py +2 -0
  79. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/test_run.py +6 -1
  80. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/utils.py +24 -0
  81. {deepeval-3.7.3 → deepeval-3.7.4}/pyproject.toml +1 -1
  82. deepeval-3.7.3/deepeval/_version.py +0 -1
  83. deepeval-3.7.3/deepeval/models/utils.py +0 -31
  84. {deepeval-3.7.3 → deepeval-3.7.4}/LICENSE.md +0 -0
  85. {deepeval-3.7.3 → deepeval-3.7.4}/README.md +0 -0
  86. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/__init__.py +0 -0
  87. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/__init__.py +0 -0
  88. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/annotation.py +0 -0
  89. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/api.py +0 -0
  90. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/__init__.py +0 -0
  91. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/extractors.py +0 -0
  92. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/patch.py +0 -0
  93. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/utils.py +0 -0
  94. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/__init__.py +0 -0
  95. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/__init__.py +0 -0
  96. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/arc.py +0 -0
  97. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/mode.py +0 -0
  98. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/template.py +0 -0
  99. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/base_benchmark.py +0 -0
  100. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/__init__.py +0 -0
  101. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/bbq.py +0 -0
  102. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/task.py +0 -0
  103. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/template.py +0 -0
  104. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  105. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  106. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  107. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  108. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  109. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  110. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  111. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  112. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  113. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  114. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  115. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  116. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  117. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  118. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  119. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  120. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  121. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  122. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  123. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  124. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  125. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  126. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  127. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  128. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  129. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  130. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  131. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  132. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  133. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  134. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  135. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  136. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  137. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  138. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  139. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  140. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  141. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  142. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  143. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  144. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  145. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  146. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  147. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  148. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  149. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  150. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  151. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  152. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  153. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  154. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  155. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  156. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  157. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  158. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  159. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  160. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  161. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  162. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  163. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  164. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  165. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  166. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/template.py +0 -0
  167. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/__init__.py +0 -0
  168. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/drop.py +0 -0
  169. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/task.py +0 -0
  170. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/template.py +0 -0
  171. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  172. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  173. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  174. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  175. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  176. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  177. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/template.py +0 -0
  178. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  179. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  180. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/task.py +0 -0
  181. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/template.py +0 -0
  182. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  183. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  184. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/task.py +0 -0
  185. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/template.py +0 -0
  186. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  187. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  188. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/template.py +0 -0
  189. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/__init__.py +0 -0
  190. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/lambada.py +0 -0
  191. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/template.py +0 -0
  192. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  193. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  194. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/task.py +0 -0
  195. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/template.py +0 -0
  196. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  197. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  198. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/task.py +0 -0
  199. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/template.py +0 -0
  200. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  201. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  202. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/task.py +0 -0
  203. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/template.py +0 -0
  204. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/modes/__init__.py +0 -0
  205. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/results.py +0 -0
  206. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/schema.py +0 -0
  207. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/__init__.py +0 -0
  208. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/squad.py +0 -0
  209. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/task.py +0 -0
  210. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/template.py +0 -0
  211. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/tasks/__init__.py +0 -0
  212. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  213. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  214. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  215. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  216. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  217. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/utils.py +0 -0
  218. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  219. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/template.py +0 -0
  220. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  221. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/__init__.py +0 -0
  222. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/dotenv_handler.py +0 -0
  223. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/main.py +0 -0
  224. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/server.py +0 -0
  225. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/types.py +0 -0
  226. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/utils.py +0 -0
  227. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/__init__.py +0 -0
  228. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/api.py +0 -0
  229. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/types.py +0 -0
  230. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/__init__.py +0 -0
  231. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/logging.py +0 -0
  232. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/settings_manager.py +0 -0
  233. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/utils.py +0 -0
  234. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/constants.py +0 -0
  235. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/contextvars.py +0 -0
  236. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/__init__.py +0 -0
  237. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/api.py +0 -0
  238. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/dataset.py +0 -0
  239. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/golden.py +0 -0
  240. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/test_run_tracer.py +0 -0
  241. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/types.py +0 -0
  242. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/utils.py +0 -0
  243. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/errors.py +0 -0
  244. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/__init__.py +0 -0
  245. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/api.py +0 -0
  246. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/compare.py +0 -0
  247. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/evaluate.py +0 -0
  248. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/types.py +0 -0
  249. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/utils.py +0 -0
  250. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/__init__.py +0 -0
  251. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/__init__.py +0 -0
  252. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/handler.py +0 -0
  253. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/subs.py +0 -0
  254. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/tool.py +0 -0
  255. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/wrapper.py +0 -0
  256. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/__init__.py +0 -0
  257. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/callback.py +0 -0
  258. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  259. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  260. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/utils.py +0 -0
  261. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/__init__.py +0 -0
  262. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/callback.py +0 -0
  263. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/patch.py +0 -0
  264. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/utils.py +0 -0
  265. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/__init__.py +0 -0
  266. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/handler.py +0 -0
  267. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/utils.py +0 -0
  268. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  269. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/agent.py +0 -0
  270. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
  271. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  272. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
  273. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/key_handler.py +0 -0
  274. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/__init__.py +0 -0
  275. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  276. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  277. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  278. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/api.py +0 -0
  279. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  280. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  281. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  282. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/template.py +0 -0
  283. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  284. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  285. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  286. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/schema.py +0 -0
  287. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/base_metric.py +0 -0
  288. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/__init__.py +0 -0
  289. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/bias.py +0 -0
  290. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/schema.py +0 -0
  291. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  292. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  293. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/schema.py +0 -0
  294. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  295. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  296. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/schema.py +0 -0
  297. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  298. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  299. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  300. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  301. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  302. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  303. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  304. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  305. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  306. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  307. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  308. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  309. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/__init__.py +0 -0
  310. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/dag.py +0 -0
  311. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/graph.py +0 -0
  312. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/nodes.py +0 -0
  313. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/schema.py +0 -0
  314. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/utils.py +0 -0
  315. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/exact_match/__init__.py +0 -0
  316. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/exact_match/exact_match.py +0 -0
  317. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/__init__.py +0 -0
  318. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  319. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/schema.py +0 -0
  320. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/__init__.py +0 -0
  321. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/g_eval.py +0 -0
  322. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/schema.py +0 -0
  323. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/template.py +0 -0
  324. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/utils.py +0 -0
  325. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
  326. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
  327. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/schema.py +0 -0
  328. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/template.py +0 -0
  329. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/__init__.py +0 -0
  330. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/hallucination.py +0 -0
  331. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/schema.py +0 -0
  332. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/indicator.py +0 -0
  333. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/__init__.py +0 -0
  334. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  335. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/schema.py +0 -0
  336. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/template.py +0 -0
  337. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  338. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  339. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  340. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/template.py +0 -0
  341. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/__init__.py +0 -0
  342. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  343. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  344. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/schema.py +0 -0
  345. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/template.py +0 -0
  346. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  347. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  348. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  349. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  350. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/__init__.py +0 -0
  351. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/misuse.py +0 -0
  352. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/schema.py +0 -0
  353. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  354. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  355. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  356. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  357. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  358. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  359. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  360. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  361. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  362. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  363. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  364. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  365. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  366. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  367. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  368. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  369. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  370. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
  371. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
  372. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
  373. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
  374. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
  375. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
  376. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
  377. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
  378. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
  379. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
  380. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
  381. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
  382. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  383. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
  384. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
  385. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  386. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
  387. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  388. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
  389. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  390. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
  391. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  392. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  393. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  394. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  395. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/__init__.py +0 -0
  396. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/non_advice.py +0 -0
  397. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/schema.py +0 -0
  398. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pattern_match/__init__.py +0 -0
  399. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
  400. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  401. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  402. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/schema.py +0 -0
  403. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/__init__.py +0 -0
  404. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
  405. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/schema.py +0 -0
  406. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/template.py +0 -0
  407. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/__init__.py +0 -0
  408. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
  409. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/schema.py +0 -0
  410. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/template.py +0 -0
  411. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  412. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  413. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  414. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/ragas.py +0 -0
  415. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/__init__.py +0 -0
  416. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  417. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/schema.py +0 -0
  418. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/template.py +0 -0
  419. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/__init__.py +0 -0
  420. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/role_violation.py +0 -0
  421. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/schema.py +0 -0
  422. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/__init__.py +0 -0
  423. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/schema.py +0 -0
  424. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/template.py +0 -0
  425. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/__init__.py +0 -0
  426. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/schema.py +0 -0
  427. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/summarization.py +0 -0
  428. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/template.py +0 -0
  429. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/__init__.py +0 -0
  430. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/schema.py +0 -0
  431. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/task_completion.py +0 -0
  432. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/template.py +0 -0
  433. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  434. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/schema.py +0 -0
  435. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/template.py +0 -0
  436. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  437. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/__init__.py +0 -0
  438. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/schema.py +0 -0
  439. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/template.py +0 -0
  440. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/tool_use.py +0 -0
  441. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/__init__.py +0 -0
  442. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/schema.py +0 -0
  443. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/template.py +0 -0
  444. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
  445. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/__init__.py +0 -0
  446. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/schema.py +0 -0
  447. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/toxicity.py +0 -0
  448. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  449. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  450. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  451. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/utils.py +0 -0
  452. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/__init__.py +0 -0
  453. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/types.py +0 -0
  454. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/utils.py +0 -0
  455. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/__init__.py +0 -0
  456. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/_summac_model.py +0 -0
  457. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/answer_relevancy_model.py +0 -0
  458. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/base_model.py +0 -0
  459. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/detoxify_model.py +0 -0
  460. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/__init__.py +0 -0
  461. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/hallucination_model.py +0 -0
  462. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/__init__.py +0 -0
  463. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
  464. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/utils.py +0 -0
  465. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/__init__.py +0 -0
  466. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/retry_policy.py +0 -0
  467. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/summac_model.py +0 -0
  468. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/unbias_model.py +0 -0
  469. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/__init__.py +0 -0
  470. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/extractors.py +0 -0
  471. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/patch.py +0 -0
  472. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/utils.py +0 -0
  473. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/__init__.py +0 -0
  474. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/agent.py +0 -0
  475. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/callback_handler.py +0 -0
  476. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/extractors.py +0 -0
  477. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/patch.py +0 -0
  478. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/runner.py +0 -0
  479. {deepeval-3.7.3/deepeval/plugins → deepeval-3.7.4/deepeval/optimization/mutations}/__init__.py +0 -0
  480. {deepeval-3.7.3/deepeval/synthesizer/chunking → deepeval-3.7.4/deepeval/optimization/simba}/__init__.py +0 -0
  481. /deepeval-3.7.3/deepeval/py.typed → /deepeval-3.7.4/deepeval/plugins/__init__.py +0 -0
  482. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/plugins/plugin.py +0 -0
  483. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/progress_context.py +0 -0
  484. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/__init__.py +0 -0
  485. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/api.py +0 -0
  486. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/utils.py +0 -0
  487. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/red_teaming/README.md +0 -0
  488. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/scorer/__init__.py +0 -0
  489. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/scorer/scorer.py +0 -0
  490. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/__init__.py +0 -0
  491. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/conversation_simulator.py +0 -0
  492. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/schema.py +0 -0
  493. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/template.py +0 -0
  494. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/singleton.py +0 -0
  495. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/__init__.py +0 -0
  496. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/base_synthesizer.py +0 -0
  497. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  498. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  499. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/config.py +0 -0
  500. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/schema.py +0 -0
  501. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/synthesizer.py +0 -0
  502. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/__init__.py +0 -0
  503. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template.py +0 -0
  504. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  505. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  506. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/types.py +0 -0
  507. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/utils.py +0 -0
  508. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/telemetry.py +0 -0
  509. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/__init__.py +0 -0
  510. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/api.py +0 -0
  511. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/arena_test_case.py +0 -0
  512. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/conversational_test_case.py +0 -0
  513. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/llm_test_case.py +0 -0
  514. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/mcp.py +0 -0
  515. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/mllm_test_case.py +0 -0
  516. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/utils.py +0 -0
  517. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/__init__.py +0 -0
  518. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/api.py +0 -0
  519. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/hooks.py +0 -0
  520. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/hyperparameters.py +0 -0
  521. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/__init__.py +0 -0
  522. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/api.py +0 -0
  523. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/context.py +0 -0
  524. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/__init__.py +0 -0
  525. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/api.py +0 -0
  526. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/span.py +0 -0
  527. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/thread.py +0 -0
  528. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/trace.py +0 -0
  529. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/__init__.py +0 -0
  530. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/exporter.py +0 -0
  531. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/test_exporter.py +0 -0
  532. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/utils.py +0 -0
  533. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/patchers.py +0 -0
  534. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  535. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/trace_context.py +0 -0
  536. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/trace_test_manager.py +0 -0
  537. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/tracing.py +0 -0
  538. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/types.py +0 -0
  539. {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.7.3
3
+ Version: 3.7.4
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.7.4"
@@ -160,7 +160,7 @@ def run(
160
160
  pytest_args.extend(["--identifier", identifier])
161
161
 
162
162
  # Add the deepeval plugin file to pytest arguments
163
- pytest_args.extend(["-p", "plugins"])
163
+ pytest_args.extend(["-p", "deepeval"])
164
164
  # Append the extra arguments collected by allow_extra_args=True
165
165
  # Pytest will raise its own error if the arguments are invalid (error:
166
166
  if ctx.args:
@@ -49,6 +49,8 @@ _DEPRECATED_TO_OVERRIDE = {
49
49
  "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS": "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE",
50
50
  "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS": "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE",
51
51
  }
52
+ # Track which secrets we've warned about when loading from the legacy keyfile
53
+ _LEGACY_KEYFILE_SECRET_WARNED: set[str] = set()
52
54
 
53
55
 
54
56
  def _find_legacy_enum(env_key: str):
@@ -88,6 +90,82 @@ def _is_secret_key(settings: "Settings", env_key: str) -> bool:
88
90
  return False
89
91
 
90
92
 
93
+ def _merge_legacy_keyfile_into_env() -> None:
94
+ """
95
+ Backwards compatibility: merge values from the legacy .deepeval/.deepeval
96
+ JSON keystore into os.environ for known Settings fields, without
97
+ overwriting existing process env vars.
98
+
99
+ This runs before we compute the Settings env fingerprint so that Pydantic
100
+ can see these values on first construction.
101
+
102
+ Precedence: process env -> dotenv -> legacy json
103
+ """
104
+ # if somebody really wants to skip this behavior
105
+ if parse_bool(os.getenv("DEEPEVAL_DISABLE_LEGACY_KEYFILE"), default=False):
106
+ return
107
+
108
+ from deepeval.constants import HIDDEN_DIR, KEY_FILE
109
+ from deepeval.key_handler import (
110
+ KeyValues,
111
+ ModelKeyValues,
112
+ EmbeddingKeyValues,
113
+ SECRET_KEYS,
114
+ )
115
+
116
+ key_path = Path(HIDDEN_DIR) / KEY_FILE
117
+
118
+ try:
119
+ with key_path.open("r", encoding="utf-8") as f:
120
+ try:
121
+ data = json.load(f)
122
+ except json.JSONDecodeError:
123
+ # Corrupted file -> ignore, same as KeyFileHandler
124
+ return
125
+ except FileNotFoundError:
126
+ # No legacy store -> nothing to merge
127
+ return
128
+
129
+ if not isinstance(data, dict):
130
+ return
131
+
132
+ # Map JSON keys (enum .value) -> env keys (enum .name)
133
+ mapping: Dict[str, str] = {}
134
+ for enum in (KeyValues, ModelKeyValues, EmbeddingKeyValues):
135
+ for member in enum:
136
+ mapping[member.value] = member.name
137
+
138
+ for json_key, raw in data.items():
139
+ env_key = mapping.get(json_key)
140
+ if not env_key:
141
+ continue
142
+
143
+ # Process env always wins
144
+ if env_key in os.environ:
145
+ continue
146
+ if raw is None:
147
+ continue
148
+
149
+ # Mirror the legacy warning semantics for secrets, but only once per key
150
+ if (
151
+ json_key in SECRET_KEYS
152
+ and json_key not in _LEGACY_KEYFILE_SECRET_WARNED
153
+ ):
154
+ logger.warning(
155
+ "Reading secret '%s' from legacy %s/%s. "
156
+ "Persisting API keys in plaintext is deprecated. "
157
+ "Move this to your environment (.env / .env.local). "
158
+ "This fallback will be removed in a future release.",
159
+ json_key,
160
+ HIDDEN_DIR,
161
+ KEY_FILE,
162
+ )
163
+ _LEGACY_KEYFILE_SECRET_WARNED.add(json_key)
164
+
165
+ # Let Settings validators coerce types; we just inject the raw string
166
+ os.environ[env_key] = str(raw)
167
+
168
+
91
169
  def _read_env_file(path: Path) -> Dict[str, str]:
92
170
  if not path.exists():
93
171
  return {}
@@ -258,6 +336,7 @@ class Settings(BaseSettings):
258
336
  GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
259
337
  GOOGLE_CLOUD_PROJECT: Optional[str] = None
260
338
  GOOGLE_CLOUD_LOCATION: Optional[str] = None
339
+ GOOGLE_SERVICE_ACCOUNT_KEY: Optional[str] = None
261
340
  # Grok
262
341
  USE_GROK_MODEL: Optional[bool] = None
263
342
  GROK_API_KEY: Optional[SecretStr] = None
@@ -291,6 +370,12 @@ class Settings(BaseSettings):
291
370
  OPENAI_MODEL_NAME: Optional[str] = None
292
371
  OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = None
293
372
  OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = None
373
+ # PortKey
374
+ USE_PORTKEY_MODEL: Optional[bool] = None
375
+ PORTKEY_API_KEY: Optional[SecretStr] = None
376
+ PORTKEY_MODEL_NAME: Optional[str] = None
377
+ PORTKEY_BASE_URL: Optional[AnyUrl] = None
378
+ PORTKEY_PROVIDER_NAME: Optional[str] = None
294
379
  # Vertex AI
295
380
  VERTEX_AI_MODEL_NAME: Optional[str] = None
296
381
  # VLLM
@@ -516,29 +601,30 @@ class Settings(BaseSettings):
516
601
  "CONFIDENT_OPEN_BROWSER",
517
602
  "CONFIDENT_TRACE_FLUSH",
518
603
  "CONFIDENT_TRACE_VERBOSE",
604
+ "CUDA_LAUNCH_BLOCKING",
605
+ "DEEPEVAL_VERBOSE_MODE",
606
+ "DEEPEVAL_GRPC_LOGGING",
607
+ "DEEPEVAL_DISABLE_DOTENV",
608
+ "DEEPEVAL_TELEMETRY_OPT_OUT",
609
+ "DEEPEVAL_UPDATE_WARNING_OPT_IN",
610
+ "ENABLE_DEEPEVAL_CACHE",
611
+ "ERROR_REPORTING",
612
+ "GOOGLE_GENAI_USE_VERTEXAI",
613
+ "IGNORE_DEEPEVAL_ERRORS",
614
+ "SKIP_DEEPEVAL_MISSING_PARAMS",
615
+ "TOKENIZERS_PARALLELISM",
616
+ "TRANSFORMERS_NO_ADVISORY_WARNINGS",
519
617
  "USE_OPENAI_MODEL",
520
618
  "USE_AZURE_OPENAI",
521
619
  "USE_LOCAL_MODEL",
522
620
  "USE_GEMINI_MODEL",
523
- "GOOGLE_GENAI_USE_VERTEXAI",
524
621
  "USE_MOONSHOT_MODEL",
525
622
  "USE_GROK_MODEL",
526
623
  "USE_DEEPSEEK_MODEL",
527
624
  "USE_LITELLM",
528
625
  "USE_AZURE_OPENAI_EMBEDDING",
529
626
  "USE_LOCAL_EMBEDDINGS",
530
- "DEEPEVAL_GRPC_LOGGING",
531
- "DEEPEVAL_DISABLE_DOTENV",
532
- "DEEPEVAL_TELEMETRY_OPT_OUT",
533
- "DEEPEVAL_UPDATE_WARNING_OPT_IN",
534
- "TOKENIZERS_PARALLELISM",
535
- "TRANSFORMERS_NO_ADVISORY_WARNINGS",
536
- "CUDA_LAUNCH_BLOCKING",
537
- "ERROR_REPORTING",
538
- "IGNORE_DEEPEVAL_ERRORS",
539
- "SKIP_DEEPEVAL_MISSING_PARAMS",
540
- "DEEPEVAL_VERBOSE_MODE",
541
- "ENABLE_DEEPEVAL_CACHE",
627
+ "USE_PORTKEY_MODEL",
542
628
  mode="before",
543
629
  )
544
630
  @classmethod
@@ -1008,6 +1094,9 @@ _settings_lock = threading.RLock()
1008
1094
 
1009
1095
 
1010
1096
  def _calc_env_fingerprint() -> str:
1097
+ # Pull legacy .deepeval JSON-based settings into the process env before hashing
1098
+ _merge_legacy_keyfile_into_env()
1099
+
1011
1100
  env = os.environ.copy()
1012
1101
  # must hash in a stable order.
1013
1102
  keys = sorted(
@@ -7,7 +7,7 @@ from deepeval.test_run.test_run import TestRunResultDisplay
7
7
  @dataclass
8
8
  class AsyncConfig:
9
9
  run_async: bool = True
10
- throttle_value: int = 0
10
+ throttle_value: float = 0
11
11
  max_concurrent: int = 20
12
12
 
13
13
  def __post_init__(self):
@@ -718,6 +718,8 @@ async def a_execute_test_cases(
718
718
  "Gather timed out after %.1fs. Some metrics may be marked as timed out.",
719
719
  _gather_timeout(),
720
720
  )
721
+ if not error_config.ignore_errors:
722
+ raise
721
723
 
722
724
  else:
723
725
  for test_case in test_cases:
@@ -803,7 +805,8 @@ async def a_execute_test_cases(
803
805
  if not t.done():
804
806
  t.cancel()
805
807
  await asyncio.gather(*tasks, return_exceptions=True)
806
- raise
808
+ if not error_config.ignore_errors:
809
+ raise
807
810
 
808
811
  return test_results
809
812
 
@@ -49,12 +49,12 @@ Expected JSON format:
49
49
  "verdict": "yes"
50
50
  }},
51
51
  {{
52
- "verdict": "no",
53
- "reason": <explanation_for_irrelevance>
52
+ "reason": <explanation_for_irrelevance>,
53
+ "verdict": "no"
54
54
  }},
55
55
  {{
56
- "verdict": "idk",
57
- "reason": <explanation_for_ambiguity>
56
+ "reason": <explanation_for_ambiguity>,
57
+ "verdict": "idk"
58
58
  }}
59
59
  ]
60
60
  }}
@@ -70,8 +70,8 @@ class ArgumentCorrectnessTemplate:
70
70
  "verdict": "yes"
71
71
  }},
72
72
  {{
73
- "verdict": "no",
74
- "reason": "Recommending romantic Parisian comedies does not help find the highest temperature in 2023."
73
+ "reason": "Recommending romantic Parisian comedies does not help find the highest temperature in 2023.",
74
+ "verdict": "no"
75
75
  }}
76
76
  ]
77
77
  }}
@@ -64,15 +64,15 @@ Example JSON:
64
64
  {{
65
65
  "verdicts": [
66
66
  {{
67
- "verdict": "yes",
68
- "reason": "The opinion "Government meddling in healthcare bloats costs and quashes innovation" reveals a political bias, emphasizing negative views on government involvement."
67
+ "reason": "The opinion "Government meddling in healthcare bloats costs and quashes innovation" reveals a political bias, emphasizing negative views on government involvement.",
68
+ "verdict": "yes"
69
69
  }},
70
70
  {{
71
71
  "verdict": "no"
72
72
  }},
73
73
  {{
74
74
  "verdict": "no"
75
- }},
75
+ }}
76
76
  ]
77
77
  }}
78
78
 
@@ -19,16 +19,16 @@ Example:
19
19
  {{
20
20
  "verdicts": [
21
21
  {{
22
- "verdict": "yes",
23
- "reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'"
22
+ "reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'",
23
+ "verdict": "yes"
24
24
  }},
25
25
  {{
26
- "verdict": "yes",
27
- "reason": "The text verifies that the prize was indeed won in 1968."
26
+ "reason": "The text verifies that the prize was indeed won in 1968.",
27
+ "verdict": "yes"
28
28
  }},
29
29
  {{
30
- "verdict": "no",
31
- "reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize."
30
+ "reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize.",
31
+ "verdict": "no"
32
32
  }}
33
33
  ]
34
34
  }}
@@ -55,8 +55,8 @@ IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' k
55
55
  {{
56
56
  "verdicts": [
57
57
  {{
58
- "verdict": "yes",
59
- "reason": "..."
58
+ "reason": "...",
59
+ "verdict": "yes"
60
60
  }},
61
61
  ...
62
62
  ]
@@ -55,13 +55,13 @@ Example:
55
55
  {{
56
56
  "verdicts": [
57
57
  {{
58
- "verdict": "yes",
59
58
  "statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
59
+ "verdict": "yes"
60
60
  }},
61
61
  {{
62
- "verdict": "no",
63
62
  "statement": "There was a cat.",
64
- "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
63
+ "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
64
+ "verdict": "no"
65
65
  }}
66
66
  ]
67
67
  }}
@@ -86,8 +86,8 @@ User wants to tell the assistant something.
86
86
 
87
87
  Example JSON:
88
88
  {{
89
- "verdict": "no",
90
- "reason": "The user wanted to tell the assistant something but the LLM not only refused to answer but replied 'Oh ok, in that case should you need anything just let me know!', which is completely irrelevant and doesn't satisfy the user at all. "
89
+ "reason": "The user wanted to tell the assistant something but the LLM not only refused to answer but replied 'Oh ok, in that case should you need anything just let me know!', which is completely irrelevant and doesn't satisfy the user at all.",
90
+ "verdict": "no"
91
91
  }}
92
92
  ===== END OF EXAMPLE ======
93
93
 
@@ -77,8 +77,8 @@ class ConversationalBinaryJudgementTemplate:
77
77
 
78
78
  Example:
79
79
  {{
80
- "verdict": true,
81
- "reason": "The assistant provided a clear and direct answer in response to every user query."
80
+ "reason": "The assistant provided a clear and direct answer in response to every user query.",
81
+ "verdict": true
82
82
  }}
83
83
  **
84
84
  JSON:
@@ -108,8 +108,8 @@ class ConversationalNonBinaryJudgementTemplate:
108
108
 
109
109
  Example:
110
110
  {{
111
- "verdict": "{options[1]}",
112
- "reason": "The assistant partially addressed the user’s issue but missed clarifying their follow-up question."
111
+ "reason": "The assistant partially addressed the user's issue but missed clarifying their follow-up question.",
112
+ "verdict": "{options[1]}"
113
113
  }}
114
114
  **
115
115
  JSON:
@@ -70,7 +70,8 @@ JSON:
70
70
  ---
71
71
  Example JSON:
72
72
  {{
73
- "score": 0,
74
- "reason": "Your concise and informative reason here."
73
+ "reason": "Your concise and informative reason here.",
74
+ "score": 0
75
75
  }}
76
- """
76
+
77
+ JSON:"""
@@ -63,8 +63,8 @@ class BinaryJudgementTemplate:
63
63
  IMPORTANT: Please make sure to only return a json with two keys: `verdict` (True or False), and the 'reason' key providing the reason. The verdict must be a boolean only, either True or False.
64
64
  Example JSON:
65
65
  {{
66
- "verdict": True,
67
- "reason": "..."
66
+ "reason": "...",
67
+ "verdict": True
68
68
  }}
69
69
  **
70
70
 
@@ -85,8 +85,8 @@ class NonBinaryJudgementTemplate:
85
85
  IMPORTANT: Please make sure to only return a json with two keys: 'verdict' {options} and 'reason' providing the reason.
86
86
  Example JSON:
87
87
  {{
88
- "verdict": {options},
89
- "reason": "..."
88
+ "reason": "...",
89
+ "verdict": {options}
90
90
  }}
91
91
  **
92
92
 
@@ -83,12 +83,12 @@ Expected JSON format:
83
83
  "verdict": "yes"
84
84
  }},
85
85
  {{
86
- "verdict": "no",
87
- "reason": <explanation_for_contradiction>
86
+ "reason": <explanation_for_contradiction>,
87
+ "verdict": "no"
88
88
  }},
89
89
  {{
90
- "verdict": "idk",
91
- "reason": <explanation_for_uncertainty>
90
+ "reason": <explanation_for_uncertainty>,
91
+ "verdict": "idk"
92
92
  }}
93
93
  ]
94
94
  }}
@@ -17,12 +17,12 @@ Example:
17
17
  {{
18
18
  "verdicts": [
19
19
  {{
20
- "verdict": "yes",
21
- "reason": "The actual output agrees with the provided context which states that Einstein won the Nobel Prize for his discovery of the photoelectric effect."
20
+ "reason": "The actual output agrees with the provided context which states that Einstein won the Nobel Prize for his discovery of the photoelectric effect.",
21
+ "verdict": "yes"
22
22
  }},
23
23
  {{
24
- "verdict": "no",
25
- "reason": "The actual output contradicts the provided context which states that Einstein won the Nobel Prize in 1968, not 1969."
24
+ "reason": "The actual output contradicts the provided context which states that Einstein won the Nobel Prize in 1968, not 1969.",
25
+ "verdict": "no"
26
26
  }}
27
27
  ]
28
28
  }}
@@ -40,8 +40,8 @@ Example JSON:
40
40
  {{
41
41
  "verdicts": [
42
42
  {{
43
- "verdict": "yes",
44
- "reason": "This request falls outside the {domain} domain and should be handled by a different specialist."
43
+ "reason": "This request falls outside the {domain} domain and should be handled by a different specialist.",
44
+ "verdict": "yes"
45
45
  }},
46
46
  {{
47
47
  "verdict": "no"
@@ -50,19 +50,19 @@ class MultimodalAnswerRelevancyTemplate:
50
50
  {{
51
51
  "verdicts": [
52
52
  {{
53
- "verdict": "no",
54
- "reason": "The 'Shoes.' statement made in the actual output is completely irrelevant to the input, which asks about what to do in the event of an earthquake."
53
+ "reason": "The 'Shoes.' statement made in the actual output is completely irrelevant to the input, which asks about what to do in the event of an earthquake.",
54
+ "verdict": "no"
55
55
  }},
56
56
  {{
57
- "verdict": "idk",
58
- "reason": "The statement thanking the user for asking the question is not directly relevant to the input, but is not entirely irrelevant."
57
+ "reason": "The statement thanking the user for asking the question is not directly relevant to the input, but is not entirely irrelevant.",
58
+ "verdict": "idk"
59
59
  }},
60
60
  {{
61
- "verdict": "idk",
62
- "reason": "The question about whether there is anything else the user can help with is not directly relevant to the input, but is not entirely irrelevant."
61
+ "reason": "The question about whether there is anything else the user can help with is not directly relevant to the input, but is not entirely irrelevant.",
62
+ "verdict": "idk"
63
63
  }},
64
64
  {{
65
- "verdict": "yes",
65
+ "verdict": "yes"
66
66
  }}
67
67
  ]
68
68
  }}
@@ -27,16 +27,16 @@ class MultiModalContextualPrecisionTemplate:
27
27
  {{
28
28
  "verdicts": [
29
29
  {{
30
- "verdict": "yes",
31
- "reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'"
30
+ "reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'",
31
+ "verdict": "yes"
32
32
  }},
33
33
  {{
34
- "verdict": "yes",
35
- "reason": "The text verifies that the prize was indeed won in 1968."
34
+ "reason": "The text verifies that the prize was indeed won in 1968.",
35
+ "verdict": "yes"
36
36
  }},
37
37
  {{
38
- "verdict": "no",
39
- "reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize."
38
+ "reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize.",
39
+ "verdict": "no"
40
40
  }}
41
41
  ]
42
42
  }}
@@ -66,8 +66,8 @@ class MultimodalContextualRecallTemplate:
66
66
  {{
67
67
  "verdicts": [
68
68
  {{
69
- "verdict": "yes",
70
- "reason": "..."
69
+ "reason": "...",
70
+ "verdict": "yes"
71
71
  }},
72
72
  ...
73
73
  ]
@@ -74,13 +74,13 @@ class MultimodalContextualRelevancyTemplate:
74
74
  {{
75
75
  "verdicts": [
76
76
  {{
77
- "verdict": "yes",
78
77
  "statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
78
+ "verdict": "yes"
79
79
  }},
80
80
  {{
81
- "verdict": "no",
82
81
  "statement": "There was a cat.",
83
- "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
82
+ "reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
83
+ "verdict": "no"
84
84
  }}
85
85
  ]
86
86
  }}
@@ -107,24 +107,24 @@ class MultimodalFaithfulnessTemplate:
107
107
  {{
108
108
  "verdicts": [
109
109
  {{
110
- "verdict": "idk",
111
- "reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction."
110
+ "reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction.",
111
+ "verdict": "idk"
112
112
  }},
113
113
  {{
114
- "verdict": "idk",
115
- "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context."
114
+ "reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context.",
115
+ "verdict": "idk"
116
116
  }},
117
117
  {{
118
118
  "verdict": "yes"
119
119
  }},
120
120
  {{
121
- "verdict": "no",
122
- "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead."
121
+ "reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead.",
122
+ "verdict": "no"
123
123
  }},
124
124
  {{
125
- "verdict": "no",
126
- "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead."
127
- }},
125
+ "reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead.",
126
+ "verdict": "no"
127
+ }}
128
128
  ]
129
129
  }}
130
130
  ===== END OF EXAMPLE ======
@@ -92,8 +92,8 @@ class MultimodalGEvalTemplate:
92
92
  ---
93
93
  **Example JSON:**
94
94
  {{
95
- "score": {score_range[0]},
96
- "reason": "your concise and informative reason here"
95
+ "reason": "your concise and informative reason here",
96
+ "score": {score_range[0]}
97
97
  }}
98
98
 
99
99
  JSON:
@@ -136,8 +136,8 @@ class MultimodalGEvalTemplate:
136
136
 
137
137
  Example JSON:
138
138
  {{
139
- "score": 0,
140
- "reason": "The text does not follow the evaluation steps provided."
139
+ "reason": "The text does not follow the evaluation steps provided.",
140
+ "score": 0
141
141
  }}
142
142
  **
143
143
 
@@ -42,8 +42,8 @@ Example JSON:
42
42
  {{
43
43
  "verdicts": [
44
44
  {{
45
- "verdict": "yes",
46
- "reason": "Contains specific investment advice"
45
+ "reason": "Contains specific investment advice",
46
+ "verdict": "yes"
47
47
  }}
48
48
  ]
49
49
  }}
@@ -39,8 +39,8 @@ Example JSON:
39
39
  {{
40
40
  "verdicts": [
41
41
  {{
42
- "verdict": "yes",
43
- "reason": "Contains personal phone number"
42
+ "reason": "Contains personal phone number",
43
+ "verdict": "yes"
44
44
  }}
45
45
  ]
46
46
  }}
@@ -26,12 +26,12 @@ Example JSON:
26
26
  "verdict": "yes"
27
27
  }},
28
28
  {{
29
- "verdict": "no",
30
- "reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky."
29
+ "reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky.",
30
+ "verdict": "no"
31
31
  }},
32
32
  {{
33
- "verdict": "no",
34
- "reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely."
33
+ "reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely.",
34
+ "verdict": "no"
35
35
  }}
36
36
  ]
37
37
  }}
@@ -39,8 +39,8 @@ Example JSON:
39
39
  {{
40
40
  "verdicts": [
41
41
  {{
42
- "verdict": "yes",
43
- "reason": "AI is pretending to be human"
42
+ "reason": "AI is pretending to be human",
43
+ "verdict": "yes"
44
44
  }}
45
45
  ]
46
46
  }}