deepeval 3.8.0__tar.gz → 3.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. {deepeval-3.8.0 → deepeval-3.8.1}/PKG-INFO +1 -1
  2. deepeval-3.8.1/deepeval/_version.py +1 -0
  3. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/annotation.py +2 -2
  4. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/settings.py +3 -0
  5. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/dataset.py +6 -4
  6. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/callback.py +1 -1
  7. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
  8. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/schema.py +6 -0
  9. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
  10. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
  11. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
  12. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/utils.py +1 -1
  13. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/amazon_bedrock_model.py +51 -6
  14. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/azure_model.py +33 -7
  15. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/gemini_model.py +6 -1
  16. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/prompt.py +7 -5
  17. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/llm_test_case.py +1 -0
  18. {deepeval-3.8.0 → deepeval-3.8.1}/pyproject.toml +1 -1
  19. deepeval-3.8.0/deepeval/_version.py +0 -1
  20. {deepeval-3.8.0 → deepeval-3.8.1}/LICENSE.md +0 -0
  21. {deepeval-3.8.0 → deepeval-3.8.1}/README.md +0 -0
  22. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/__init__.py +0 -0
  23. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/__init__.py +0 -0
  24. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/api.py +0 -0
  25. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/__init__.py +0 -0
  26. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/extractors.py +0 -0
  27. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/patch.py +0 -0
  28. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/utils.py +0 -0
  29. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/__init__.py +0 -0
  30. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/__init__.py +0 -0
  31. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/arc.py +0 -0
  32. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/mode.py +0 -0
  33. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/template.py +0 -0
  34. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/base_benchmark.py +0 -0
  35. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/__init__.py +0 -0
  36. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/bbq.py +0 -0
  37. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/task.py +0 -0
  38. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/template.py +0 -0
  39. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  40. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  41. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  42. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  43. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  44. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  45. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  46. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  47. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  48. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  49. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  50. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  51. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  52. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  53. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  54. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  55. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  56. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  57. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  58. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  59. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  60. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  61. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  62. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  63. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  64. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  65. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  66. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  67. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  68. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  69. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  70. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  71. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  72. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  73. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  74. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  75. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  76. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  77. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  78. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  79. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  80. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  81. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  82. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  83. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  84. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  85. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  86. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  87. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  88. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  89. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  90. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  91. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  92. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  93. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  94. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  95. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  96. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  97. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  98. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  99. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  100. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  101. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/template.py +0 -0
  102. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/__init__.py +0 -0
  103. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/drop.py +0 -0
  104. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/task.py +0 -0
  105. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/template.py +0 -0
  106. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  107. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  108. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  109. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  110. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  111. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  112. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/template.py +0 -0
  113. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  114. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  115. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/task.py +0 -0
  116. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/template.py +0 -0
  117. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  118. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  119. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/task.py +0 -0
  120. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/template.py +0 -0
  121. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  122. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  123. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/template.py +0 -0
  124. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/__init__.py +0 -0
  125. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/lambada.py +0 -0
  126. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/template.py +0 -0
  127. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  128. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  129. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/task.py +0 -0
  130. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/template.py +0 -0
  131. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  132. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  133. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/task.py +0 -0
  134. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/template.py +0 -0
  135. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  136. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  137. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/task.py +0 -0
  138. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/template.py +0 -0
  139. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/modes/__init__.py +0 -0
  140. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/results.py +0 -0
  141. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/schema.py +0 -0
  142. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/__init__.py +0 -0
  143. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/squad.py +0 -0
  144. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/task.py +0 -0
  145. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/template.py +0 -0
  146. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/tasks/__init__.py +0 -0
  147. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  148. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  149. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  150. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  151. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  152. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/utils.py +0 -0
  153. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  154. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/template.py +0 -0
  155. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  156. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/__init__.py +0 -0
  157. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/dotenv_handler.py +0 -0
  158. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/main.py +0 -0
  159. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/server.py +0 -0
  160. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/test.py +0 -0
  161. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/types.py +0 -0
  162. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/utils.py +0 -0
  163. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/__init__.py +0 -0
  164. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/api.py +0 -0
  165. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/types.py +0 -0
  166. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/__init__.py +0 -0
  167. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/dotenv_handler.py +0 -0
  168. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/logging.py +0 -0
  169. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/settings_manager.py +0 -0
  170. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/utils.py +0 -0
  171. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/constants.py +0 -0
  172. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/contextvars.py +0 -0
  173. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/__init__.py +0 -0
  174. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/api.py +0 -0
  175. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/golden.py +0 -0
  176. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/test_run_tracer.py +0 -0
  177. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/types.py +0 -0
  178. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/utils.py +0 -0
  179. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/errors.py +0 -0
  180. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/__init__.py +0 -0
  181. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/api.py +0 -0
  182. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/compare.py +0 -0
  183. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/configs.py +0 -0
  184. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/evaluate.py +0 -0
  185. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/execute.py +0 -0
  186. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/types.py +0 -0
  187. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/utils.py +0 -0
  188. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/__init__.py +0 -0
  189. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/__init__.py +0 -0
  190. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/handler.py +0 -0
  191. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/subs.py +0 -0
  192. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/tool.py +0 -0
  193. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/wrapper.py +0 -0
  194. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/__init__.py +0 -0
  195. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/callback.py +0 -0
  196. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  197. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  198. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/utils.py +0 -0
  199. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/__init__.py +0 -0
  200. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/patch.py +0 -0
  201. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/utils.py +0 -0
  202. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/__init__.py +0 -0
  203. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/handler.py +0 -0
  204. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/utils.py +0 -0
  205. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  206. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/agent.py +0 -0
  207. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
  208. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  209. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
  210. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/key_handler.py +0 -0
  211. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/__init__.py +0 -0
  212. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  213. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  214. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  215. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/template.py +0 -0
  216. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/api.py +0 -0
  217. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  218. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  219. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  220. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/template.py +0 -0
  221. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  222. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  223. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  224. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/schema.py +0 -0
  225. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/template.py +0 -0
  226. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/base_metric.py +0 -0
  227. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/__init__.py +0 -0
  228. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/bias.py +0 -0
  229. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/schema.py +0 -0
  230. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/template.py +0 -0
  231. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  232. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  233. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/schema.py +0 -0
  234. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/template.py +0 -0
  235. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  236. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/template.py +0 -0
  237. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  238. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  239. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  240. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  241. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  242. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  243. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  244. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/template.py +0 -0
  245. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  246. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  247. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  248. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/templates.py +0 -0
  249. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  250. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  251. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  252. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  253. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/__init__.py +0 -0
  254. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/dag.py +0 -0
  255. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/graph.py +0 -0
  256. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/nodes.py +0 -0
  257. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/schema.py +0 -0
  258. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/templates.py +0 -0
  259. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/utils.py +0 -0
  260. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/exact_match/__init__.py +0 -0
  261. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/exact_match/exact_match.py +0 -0
  262. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/__init__.py +0 -0
  263. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  264. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/schema.py +0 -0
  265. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/template.py +0 -0
  266. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/__init__.py +0 -0
  267. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/g_eval.py +0 -0
  268. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/schema.py +0 -0
  269. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/template.py +0 -0
  270. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/utils.py +0 -0
  271. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
  272. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
  273. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/schema.py +0 -0
  274. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/template.py +0 -0
  275. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/__init__.py +0 -0
  276. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/hallucination.py +0 -0
  277. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/schema.py +0 -0
  278. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/template.py +0 -0
  279. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/indicator.py +0 -0
  280. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/__init__.py +0 -0
  281. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  282. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/schema.py +0 -0
  283. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/template.py +0 -0
  284. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  285. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  286. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  287. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/template.py +0 -0
  288. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/__init__.py +0 -0
  289. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  290. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  291. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/schema.py +0 -0
  292. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/template.py +0 -0
  293. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  294. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  295. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  296. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  297. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/__init__.py +0 -0
  298. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/misuse.py +0 -0
  299. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/schema.py +0 -0
  300. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/template.py +0 -0
  301. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  302. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  303. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  304. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  305. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  306. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  307. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  308. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  309. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  310. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  311. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  312. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  313. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  314. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  315. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  316. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  317. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  318. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  319. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/__init__.py +0 -0
  320. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/non_advice.py +0 -0
  321. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/schema.py +0 -0
  322. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/template.py +0 -0
  323. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pattern_match/__init__.py +0 -0
  324. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
  325. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  326. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  327. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/schema.py +0 -0
  328. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/template.py +0 -0
  329. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/__init__.py +0 -0
  330. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
  331. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/schema.py +0 -0
  332. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/template.py +0 -0
  333. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/__init__.py +0 -0
  334. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
  335. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/schema.py +0 -0
  336. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/template.py +0 -0
  337. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  338. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  339. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  340. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/template.py +0 -0
  341. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/ragas.py +0 -0
  342. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/__init__.py +0 -0
  343. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  344. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/schema.py +0 -0
  345. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/template.py +0 -0
  346. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/__init__.py +0 -0
  347. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/role_violation.py +0 -0
  348. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/schema.py +0 -0
  349. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/template.py +0 -0
  350. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/__init__.py +0 -0
  351. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/schema.py +0 -0
  352. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
  353. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/template.py +0 -0
  354. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/__init__.py +0 -0
  355. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/schema.py +0 -0
  356. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/summarization.py +0 -0
  357. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/template.py +0 -0
  358. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/__init__.py +0 -0
  359. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/schema.py +0 -0
  360. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/task_completion.py +0 -0
  361. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/template.py +0 -0
  362. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  363. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/schema.py +0 -0
  364. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/template.py +0 -0
  365. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  366. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/__init__.py +0 -0
  367. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/schema.py +0 -0
  368. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/template.py +0 -0
  369. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/tool_use.py +0 -0
  370. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/__init__.py +0 -0
  371. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/schema.py +0 -0
  372. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/template.py +0 -0
  373. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
  374. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/__init__.py +0 -0
  375. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/schema.py +0 -0
  376. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/template.py +0 -0
  377. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/toxicity.py +0 -0
  378. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/__init__.py +0 -0
  379. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/schema.py +0 -0
  380. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/template.py +0 -0
  381. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +0 -0
  382. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/__init__.py +0 -0
  383. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/schema.py +0 -0
  384. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/template.py +0 -0
  385. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +0 -0
  386. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/__init__.py +0 -0
  387. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/schema.py +0 -0
  388. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/template.py +0 -0
  389. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +0 -0
  390. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/__init__.py +0 -0
  391. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/schema.py +0 -0
  392. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/template.py +0 -0
  393. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +0 -0
  394. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  395. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  396. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/template.py +0 -0
  397. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  398. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/__init__.py +0 -0
  399. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/types.py +0 -0
  400. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/utils.py +0 -0
  401. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/__init__.py +0 -0
  402. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/_summac_model.py +0 -0
  403. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/answer_relevancy_model.py +0 -0
  404. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/base_model.py +0 -0
  405. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/detoxify_model.py +0 -0
  406. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/__init__.py +0 -0
  407. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
  408. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
  409. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
  410. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
  411. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/hallucination_model.py +0 -0
  412. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/__init__.py +0 -0
  413. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/anthropic_model.py +0 -0
  414. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/constants.py +0 -0
  415. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/deepseek_model.py +0 -0
  416. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/grok_model.py +0 -0
  417. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/kimi_model.py +0 -0
  418. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/litellm_model.py +0 -0
  419. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/local_model.py +0 -0
  420. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/ollama_model.py +0 -0
  421. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/openai_model.py +0 -0
  422. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/openrouter_model.py +0 -0
  423. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/portkey_model.py +0 -0
  424. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/utils.py +0 -0
  425. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/retry_policy.py +0 -0
  426. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/summac_model.py +0 -0
  427. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/unbias_model.py +0 -0
  428. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/utils.py +0 -0
  429. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/__init__.py +0 -0
  430. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/extractors.py +0 -0
  431. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/patch.py +0 -0
  432. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/utils.py +0 -0
  433. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/__init__.py +0 -0
  434. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/agent.py +0 -0
  435. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/callback_handler.py +0 -0
  436. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/extractors.py +0 -0
  437. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/patch.py +0 -0
  438. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/runner.py +0 -0
  439. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/__init__.py +0 -0
  440. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/__init__.py +0 -0
  441. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/base.py +0 -0
  442. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/configs.py +0 -0
  443. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/__init__.py +0 -0
  444. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/copro.py +0 -0
  445. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/__init__.py +0 -0
  446. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/gepa.py +0 -0
  447. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/__init__.py +0 -0
  448. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +0 -0
  449. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/miprov2.py +0 -0
  450. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/proposer.py +0 -0
  451. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/__init__.py +0 -0
  452. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/simba.py +0 -0
  453. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/types.py +0 -0
  454. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/configs.py +0 -0
  455. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/policies.py +0 -0
  456. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/prompt_optimizer.py +0 -0
  457. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/__init__.py +0 -0
  458. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/rewriter.py +0 -0
  459. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/utils.py +0 -0
  460. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/__init__.py +0 -0
  461. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/base.py +0 -0
  462. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/scorer.py +0 -0
  463. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/utils.py +0 -0
  464. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/types.py +0 -0
  465. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/utils.py +0 -0
  466. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/plugins/__init__.py +0 -0
  467. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/plugins/plugin.py +0 -0
  468. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/progress_context.py +0 -0
  469. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/__init__.py +0 -0
  470. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/api.py +0 -0
  471. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/utils.py +0 -0
  472. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/py.typed +0 -0
  473. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/red_teaming/README.md +0 -0
  474. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/scorer/__init__.py +0 -0
  475. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/scorer/scorer.py +0 -0
  476. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/__init__.py +0 -0
  477. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/conversation_simulator.py +0 -0
  478. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/schema.py +0 -0
  479. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/template.py +0 -0
  480. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/singleton.py +0 -0
  481. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/__init__.py +0 -0
  482. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/base_synthesizer.py +0 -0
  483. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/__init__.py +0 -0
  484. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  485. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  486. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/config.py +0 -0
  487. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/schema.py +0 -0
  488. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/synthesizer.py +0 -0
  489. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/__init__.py +0 -0
  490. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template.py +0 -0
  491. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  492. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  493. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/types.py +0 -0
  494. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/utils.py +0 -0
  495. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/telemetry.py +0 -0
  496. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/__init__.py +0 -0
  497. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/api.py +0 -0
  498. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/arena_test_case.py +0 -0
  499. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/conversational_test_case.py +0 -0
  500. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/mcp.py +0 -0
  501. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/utils.py +0 -0
  502. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/__init__.py +0 -0
  503. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/api.py +0 -0
  504. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/cache.py +0 -0
  505. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/hooks.py +0 -0
  506. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/hyperparameters.py +0 -0
  507. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/test_run.py +0 -0
  508. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/__init__.py +0 -0
  509. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/api.py +0 -0
  510. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/context.py +0 -0
  511. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/__init__.py +0 -0
  512. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/api.py +0 -0
  513. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/span.py +0 -0
  514. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/thread.py +0 -0
  515. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/trace.py +0 -0
  516. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/__init__.py +0 -0
  517. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/exporter.py +0 -0
  518. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/test_exporter.py +0 -0
  519. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/utils.py +0 -0
  520. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/patchers.py +0 -0
  521. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  522. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/trace_context.py +0 -0
  523. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/trace_test_manager.py +0 -0
  524. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/tracing.py +0 -0
  525. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/types.py +0 -0
  526. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/utils.py +0 -0
  527. {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.8.0
3
+ Version: 3.8.1
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.8.1"
@@ -14,7 +14,7 @@ def send_annotation(
14
14
  explanation: Optional[str] = None,
15
15
  user_id: Optional[str] = None,
16
16
  type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
17
- ) -> str:
17
+ ) -> None:
18
18
  api_annotation = APIAnnotation(
19
19
  rating=rating,
20
20
  traceUuid=trace_uuid,
@@ -50,7 +50,7 @@ async def a_send_annotation(
50
50
  explanation: Optional[str] = None,
51
51
  type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
52
52
  user_id: Optional[str] = None,
53
- ) -> str:
53
+ ) -> None:
54
54
  api_annotation = APIAnnotation(
55
55
  rating=rating,
56
56
  traceUuid=trace_uuid,
@@ -447,6 +447,9 @@ class Settings(BaseSettings):
447
447
  AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
448
448
  None, description="Azure OpenAI API key."
449
449
  )
450
+ AZURE_OPENAI_AD_TOKEN: Optional[SecretStr] = Field(
451
+ None, description="Azure OpenAI Ad Token."
452
+ )
450
453
  AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
451
454
  None, description="Azure OpenAI endpoint URL."
452
455
  )
@@ -84,9 +84,11 @@ class EvaluationDataset:
84
84
  def __init__(
85
85
  self,
86
86
  goldens: Union[List[Golden], List[ConversationalGolden]] = [],
87
+ confident_api_key: Optional[str] = None,
87
88
  ):
88
89
  self._alias = None
89
90
  self._id = None
91
+ self.confident_api_key = confident_api_key
90
92
  if len(goldens) > 0:
91
93
  self._multi_turn = (
92
94
  True if isinstance(goldens[0], ConversationalGolden) else False
@@ -722,7 +724,7 @@ class EvaluationDataset:
722
724
  "Unable to push empty dataset to Confident AI, there must be at least one golden in dataset."
723
725
  )
724
726
 
725
- api = Api()
727
+ api = Api(api_key=self.confident_api_key)
726
728
  api_dataset = APIDataset(
727
729
  goldens=self.goldens if not self._multi_turn else None,
728
730
  conversationalGoldens=(self.goldens if self._multi_turn else None),
@@ -755,7 +757,7 @@ class EvaluationDataset:
755
757
  auto_convert_goldens_to_test_cases: bool = False,
756
758
  public: bool = False,
757
759
  ):
758
- api = Api()
760
+ api = Api(api_key=self.confident_api_key)
759
761
  with capture_pull_dataset():
760
762
  with Progress(
761
763
  SpinnerColumn(style="rgb(106,0,255)"),
@@ -839,7 +841,7 @@ class EvaluationDataset:
839
841
  raise ValueError(
840
842
  f"Can't queue empty list of goldens to dataset with alias: {alias} on Confident AI."
841
843
  )
842
- api = Api()
844
+ api = Api(api_key=self.confident_api_key)
843
845
 
844
846
  multi_turn = isinstance(goldens[0], ConversationalGolden)
845
847
 
@@ -871,7 +873,7 @@ class EvaluationDataset:
871
873
  self,
872
874
  alias: str,
873
875
  ):
874
- api = Api()
876
+ api = Api(api_key=self.confident_api_key)
875
877
  api.send_request(
876
878
  method=HttpMethods.DELETE,
877
879
  endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
@@ -539,4 +539,4 @@ class CallbackHandler(BaseCallbackHandler):
539
539
  with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
540
540
  retriever_span.status = TraceSpanStatus.ERRORED
541
541
  retriever_span.error = str(error)
542
- exit_current_context(uuid_str=uuid_str)
542
+ exit_current_context(uuid_str=uuid_str)
@@ -23,6 +23,7 @@ from deepeval.metrics.contextual_recall.schema import (
23
23
  ContextualRecallVerdict,
24
24
  Verdicts,
25
25
  ContextualRecallScoreReason,
26
+ VerdictWithExpectedOutput,
26
27
  )
27
28
  from deepeval.metrics.api import metric_data_manager
28
29
 
@@ -93,7 +94,7 @@ class ContextualRecallMetric(BaseMetric):
93
94
  expected_output = test_case.expected_output
94
95
  retrieval_context = test_case.retrieval_context
95
96
 
96
- self.verdicts: List[ContextualRecallVerdict] = (
97
+ self.verdicts: List[VerdictWithExpectedOutput] = (
97
98
  self._generate_verdicts(
98
99
  expected_output, retrieval_context, multimodal
99
100
  )
@@ -144,7 +145,7 @@ class ContextualRecallMetric(BaseMetric):
144
145
  expected_output = test_case.expected_output
145
146
  retrieval_context = test_case.retrieval_context
146
147
 
147
- self.verdicts: List[ContextualRecallVerdict] = (
148
+ self.verdicts: List[VerdictWithExpectedOutput] = (
148
149
  await self._a_generate_verdicts(
149
150
  expected_output, retrieval_context, multimodal
150
151
  )
@@ -241,13 +242,13 @@ class ContextualRecallMetric(BaseMetric):
241
242
  expected_output: str,
242
243
  retrieval_context: List[str],
243
244
  multimodal: bool,
244
- ) -> List[ContextualRecallVerdict]:
245
+ ) -> List[VerdictWithExpectedOutput]:
245
246
  prompt = self.evaluation_template.generate_verdicts(
246
247
  expected_output=expected_output,
247
248
  retrieval_context=retrieval_context,
248
249
  multimodal=multimodal,
249
250
  )
250
- return await a_generate_with_schema_and_extract(
251
+ verdicts = await a_generate_with_schema_and_extract(
251
252
  metric=self,
252
253
  prompt=prompt,
253
254
  schema_cls=Verdicts,
@@ -256,19 +257,28 @@ class ContextualRecallMetric(BaseMetric):
256
257
  ContextualRecallVerdict(**item) for item in data["verdicts"]
257
258
  ],
258
259
  )
260
+ final_verdicts = []
261
+ for verdict in verdicts:
262
+ new_verdict = VerdictWithExpectedOutput(
263
+ verdict=verdict.verdict,
264
+ reason=verdict.reason,
265
+ expected_output=expected_output,
266
+ )
267
+ final_verdicts.append(new_verdict)
268
+ return final_verdicts
259
269
 
260
270
  def _generate_verdicts(
261
271
  self,
262
272
  expected_output: str,
263
273
  retrieval_context: List[str],
264
274
  multimodal: bool,
265
- ) -> List[ContextualRecallVerdict]:
275
+ ) -> List[VerdictWithExpectedOutput]:
266
276
  prompt = self.evaluation_template.generate_verdicts(
267
277
  expected_output=expected_output,
268
278
  retrieval_context=retrieval_context,
269
279
  multimodal=multimodal,
270
280
  )
271
- return generate_with_schema_and_extract(
281
+ verdicts = generate_with_schema_and_extract(
272
282
  metric=self,
273
283
  prompt=prompt,
274
284
  schema_cls=Verdicts,
@@ -277,6 +287,15 @@ class ContextualRecallMetric(BaseMetric):
277
287
  ContextualRecallVerdict(**item) for item in data["verdicts"]
278
288
  ],
279
289
  )
290
+ final_verdicts = []
291
+ for verdict in verdicts:
292
+ new_verdict = VerdictWithExpectedOutput(
293
+ verdict=verdict.verdict,
294
+ reason=verdict.reason,
295
+ expected_output=expected_output,
296
+ )
297
+ final_verdicts.append(new_verdict)
298
+ return final_verdicts
280
299
 
281
300
  def is_successful(self) -> bool:
282
301
  if self.error is not None:
@@ -7,6 +7,12 @@ class ContextualRecallVerdict(BaseModel):
7
7
  reason: str
8
8
 
9
9
 
10
+ class VerdictWithExpectedOutput(BaseModel):
11
+ verdict: str
12
+ reason: str
13
+ expected_output: str
14
+
15
+
10
16
  class Verdicts(BaseModel):
11
17
  verdicts: List[ContextualRecallVerdict]
12
18
 
@@ -85,7 +85,12 @@ class ImageCoherenceMetric(BaseMetric):
85
85
  self.contexts_below = []
86
86
  self.scores = []
87
87
  self.reasons = []
88
- for image_index in self.get_image_indices(actual_output):
88
+ image_indices = self.get_image_indices(actual_output)
89
+ if not image_indices:
90
+ raise ValueError(
91
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
92
+ )
93
+ for image_index in image_indices:
89
94
  context_above, context_below = self.get_image_context(
90
95
  image_index, actual_output
91
96
  )
@@ -188,6 +193,10 @@ class ImageCoherenceMetric(BaseMetric):
188
193
 
189
194
  tasks = []
190
195
  image_indices = self.get_image_indices(actual_output)
196
+ if not image_indices:
197
+ raise ValueError(
198
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
199
+ )
191
200
  for image_index in image_indices:
192
201
  context_above, context_below = self.get_image_context(
193
202
  image_index, actual_output
@@ -86,7 +86,12 @@ class ImageHelpfulnessMetric(BaseMetric):
86
86
  self.contexts_below = []
87
87
  self.scores = []
88
88
  self.reasons = []
89
- for image_index in self.get_image_indices(actual_output):
89
+ image_indices = self.get_image_indices(actual_output)
90
+ if not image_indices:
91
+ raise ValueError(
92
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
93
+ )
94
+ for image_index in image_indices:
90
95
  context_above, context_below = self.get_image_context(
91
96
  image_index, actual_output
92
97
  )
@@ -189,6 +194,10 @@ class ImageHelpfulnessMetric(BaseMetric):
189
194
 
190
195
  tasks = []
191
196
  image_indices = self.get_image_indices(actual_output)
197
+ if not image_indices:
198
+ raise ValueError(
199
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
200
+ )
192
201
  for image_index in image_indices:
193
202
  context_above, context_below = self.get_image_context(
194
203
  image_index, actual_output
@@ -86,7 +86,12 @@ class ImageReferenceMetric(BaseMetric):
86
86
  self.contexts_below = []
87
87
  self.scores = []
88
88
  self.reasons = []
89
- for image_index in self.get_image_indices(actual_output):
89
+ image_indices = self.get_image_indices(actual_output)
90
+ if not image_indices:
91
+ raise ValueError(
92
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
93
+ )
94
+ for image_index in image_indices:
90
95
  context_above, context_below = self.get_image_context(
91
96
  image_index, actual_output
92
97
  )
@@ -189,6 +194,10 @@ class ImageReferenceMetric(BaseMetric):
189
194
 
190
195
  tasks = []
191
196
  image_indices = self.get_image_indices(actual_output)
197
+ if not image_indices:
198
+ raise ValueError(
199
+ f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
200
+ )
192
201
  for image_index in image_indices:
193
202
  context_above, context_below = self.get_image_context(
194
203
  image_index, actual_output
@@ -312,7 +312,7 @@ def check_llm_test_case_params(
312
312
  if isinstance(ele, MLLMImage):
313
313
  count += 1
314
314
  if count != actual_output_image_count:
315
- error_str = f"Unable to evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
315
+ error_str = f"Can only evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
316
316
  raise ValueError(error_str)
317
317
 
318
318
  if isinstance(test_case, LLMTestCase) is False:
@@ -14,6 +14,7 @@ from deepeval.models.retry_policy import (
14
14
  sdk_retries_for,
15
15
  )
16
16
  from deepeval.test_case import MLLMImage
17
+ from deepeval.errors import DeepEvalError
17
18
  from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
18
19
  from deepeval.models import DeepEvalBaseLLM
19
20
  from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
@@ -155,27 +156,28 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
155
156
 
156
157
  def generate(
157
158
  self, prompt: str, schema: Optional[BaseModel] = None
158
- ) -> Tuple[Union[str, BaseModel], float]:
159
+ ) -> Tuple[Union[str, BaseModel], Optional[float]]:
159
160
  return safe_asyncio_run(self.a_generate(prompt, schema))
160
161
 
161
162
  @retry_bedrock
162
163
  async def a_generate(
163
164
  self, prompt: str, schema: Optional[BaseModel] = None
164
- ) -> Tuple[Union[str, BaseModel], float]:
165
+ ) -> Tuple[Union[str, BaseModel], Optional[float]]:
165
166
  if check_if_multimodal(prompt):
166
167
  prompt = convert_to_multi_modal_array(input=prompt)
167
168
  payload = self.generate_payload(prompt)
168
169
  else:
169
170
  payload = self.get_converse_request_body(prompt)
170
171
 
171
- payload = self.get_converse_request_body(prompt)
172
172
  client = await self._ensure_client()
173
173
  response = await client.converse(
174
174
  modelId=self.get_model_name(),
175
175
  messages=payload["messages"],
176
176
  inferenceConfig=payload["inferenceConfig"],
177
177
  )
178
- message = response["output"]["message"]["content"][0]["text"]
178
+
179
+ message = self._extract_text_from_converse_response(response)
180
+
179
181
  cost = self.calculate_cost(
180
182
  response["usage"]["inputTokens"],
181
183
  response["usage"]["outputTokens"],
@@ -206,7 +208,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
206
208
  try:
207
209
  image_raw_bytes = base64.b64decode(element.dataBase64)
208
210
  except Exception:
209
- raise ValueError(
211
+ raise DeepEvalError(
210
212
  f"Invalid base64 data in MLLMImage: {element._id}"
211
213
  )
212
214
 
@@ -294,6 +296,46 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
294
296
  # Helpers
295
297
  ###############################################
296
298
 
299
+ @staticmethod
300
+ def _extract_text_from_converse_response(response: dict) -> str:
301
+ try:
302
+ content = response["output"]["message"]["content"]
303
+ except Exception as e:
304
+ raise DeepEvalError(
305
+ "Missing output.message.content in Bedrock response"
306
+ ) from e
307
+
308
+ # Collect any text blocks (ignore reasoning/tool blocks)
309
+ text_parts = []
310
+ for block in content:
311
+ if isinstance(block, dict) and "text" in block:
312
+ v = block.get("text")
313
+ if isinstance(v, str) and v.strip():
314
+ text_parts.append(v)
315
+
316
+ if text_parts:
317
+ # join in case there are multiple text blocks
318
+ return "\n".join(text_parts)
319
+
320
+ # No text blocks present; raise an actionable error
321
+ keys = []
322
+ for b in content:
323
+ if isinstance(b, dict):
324
+ keys.append(list(b.keys()))
325
+ else:
326
+ keys.append(type(b).__name__)
327
+
328
+ stop_reason = (
329
+ response.get("stopReason")
330
+ or response.get("output", {}).get("stopReason")
331
+ or response.get("output", {}).get("message", {}).get("stopReason")
332
+ )
333
+
334
+ raise DeepEvalError(
335
+ f"Bedrock response contained no text content blocks. "
336
+ f"content keys={keys}, stopReason={stop_reason}"
337
+ )
338
+
297
339
  def get_converse_request_body(self, prompt: str) -> dict:
298
340
 
299
341
  return {
@@ -303,11 +345,14 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
303
345
  },
304
346
  }
305
347
 
306
- def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
348
+ def calculate_cost(
349
+ self, input_tokens: int, output_tokens: int
350
+ ) -> Optional[float]:
307
351
  if self.model_data.input_price and self.model_data.output_price:
308
352
  input_cost = input_tokens * self.model_data.input_price
309
353
  output_cost = output_tokens * self.model_data.output_price
310
354
  return input_cost + output_cost
355
+ return None
311
356
 
312
357
  def load_model(self):
313
358
  pass
@@ -1,6 +1,6 @@
1
1
  from openai.types.chat.chat_completion import ChatCompletion
2
2
  from openai import AzureOpenAI, AsyncAzureOpenAI
3
- from typing import Optional, Tuple, Union, Dict, List
3
+ from typing import Optional, Tuple, Union, Dict, List, Callable, Awaitable
4
4
  from pydantic import BaseModel, SecretStr
5
5
 
6
6
  from deepeval.errors import DeepEvalError
@@ -42,6 +42,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
42
42
  model: Optional[str] = None,
43
43
  api_key: Optional[str] = None,
44
44
  base_url: Optional[str] = None,
45
+ azure_ad_token_provider: Optional[
46
+ Callable[[], "str | Awaitable[str]"]
47
+ ] = None,
48
+ azure_ad_token: Optional[str] = None,
45
49
  temperature: Optional[float] = None,
46
50
  cost_per_input_token: Optional[float] = None,
47
51
  cost_per_output_token: Optional[float] = None,
@@ -67,12 +71,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
67
71
  model = model or settings.AZURE_MODEL_NAME
68
72
  deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
69
73
 
74
+ self.azure_ad_token_provider = azure_ad_token_provider
75
+
70
76
  if api_key is not None:
71
77
  # keep it secret, keep it safe from serializings, logging and alike
72
78
  self.api_key: Optional[SecretStr] = SecretStr(api_key)
73
79
  else:
74
80
  self.api_key = settings.AZURE_OPENAI_API_KEY
75
81
 
82
+ if azure_ad_token is not None:
83
+ self.azure_ad_token = azure_ad_token
84
+ else:
85
+ self.azure_ad_token = settings.AZURE_OPENAI_AD_TOKEN
86
+
76
87
  api_version = api_version or settings.OPENAI_API_VERSION
77
88
  if base_url is not None:
78
89
  base_url = str(base_url).rstrip("/")
@@ -431,18 +442,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
431
442
  return kwargs
432
443
 
433
444
  def _build_client(self, cls):
434
- api_key = require_secret_api_key(
435
- self.api_key,
436
- provider_label="AzureOpenAI",
437
- env_var_name="AZURE_OPENAI_API_KEY",
438
- param_hint="`api_key` to AzureOpenAIModel(...)",
439
- )
445
+ # Only require the API key / Azure ad token if no token provider is supplied
446
+ azure_ad_token = None
447
+ api_key = None
448
+
449
+ if self.azure_ad_token_provider is None:
450
+ if self.azure_ad_token is not None:
451
+ azure_ad_token = require_secret_api_key(
452
+ self.azure_ad_token,
453
+ provider_label="AzureOpenAI",
454
+ env_var_name="AZURE_OPENAI_AD_TOKEN",
455
+ param_hint="`azure_ad_token` to AzureOpenAIModel(...)",
456
+ )
457
+ else:
458
+ api_key = require_secret_api_key(
459
+ self.api_key,
460
+ provider_label="AzureOpenAI",
461
+ env_var_name="AZURE_OPENAI_API_KEY",
462
+ param_hint="`api_key` to AzureOpenAIModel(...)",
463
+ )
440
464
 
441
465
  kw = dict(
442
466
  api_key=api_key,
443
467
  api_version=self.api_version,
444
468
  azure_endpoint=self.base_url,
445
469
  azure_deployment=self.deployment_name,
470
+ azure_ad_token_provider=self.azure_ad_token_provider,
471
+ azure_ad_token=azure_ad_token,
446
472
  **self._client_kwargs(),
447
473
  )
448
474
  try:
@@ -65,6 +65,7 @@ class GeminiModel(DeepEvalBaseLLM):
65
65
  project: Optional[str] = None,
66
66
  location: Optional[str] = None,
67
67
  service_account_key: Optional[Union[str, Dict[str, str]]] = None,
68
+ use_vertexai: Optional[bool] = None,
68
69
  generation_kwargs: Optional[Dict] = None,
69
70
  **kwargs,
70
71
  ):
@@ -93,7 +94,11 @@ class GeminiModel(DeepEvalBaseLLM):
93
94
  location if location is not None else settings.GOOGLE_CLOUD_LOCATION
94
95
  )
95
96
  self.location = str(location).strip() if location is not None else None
96
- self.use_vertexai = settings.GOOGLE_GENAI_USE_VERTEXAI
97
+ self.use_vertexai = (
98
+ use_vertexai
99
+ if use_vertexai is not None
100
+ else settings.GOOGLE_GENAI_USE_VERTEXAI
101
+ )
97
102
 
98
103
  self.service_account_key: Optional[SecretStr] = None
99
104
  if service_account_key is None:
@@ -114,6 +114,7 @@ class Prompt:
114
114
  output_type: Optional[OutputType] = None,
115
115
  output_schema: Optional[Type[BaseModel]] = None,
116
116
  interpolation_type: Optional[PromptInterpolationType] = None,
117
+ confident_api_key: Optional[str] = None,
117
118
  ):
118
119
  if text_template and messages_template:
119
120
  raise TypeError(
@@ -129,6 +130,7 @@ class Prompt:
129
130
  self.interpolation_type: PromptInterpolationType = (
130
131
  interpolation_type or PromptInterpolationType.FSTRING
131
132
  )
133
+ self.confident_api_key = confident_api_key
132
134
 
133
135
  self._version = None
134
136
  self._prompt_version_id: Optional[str] = None
@@ -244,7 +246,7 @@ class Prompt:
244
246
  raise ValueError(
245
247
  "Prompt alias is not set. Please set an alias to continue."
246
248
  )
247
- api = Api()
249
+ api = Api(api_key=self.confident_api_key)
248
250
  data, _ = api.send_request(
249
251
  method=HttpMethods.GET,
250
252
  endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
@@ -496,7 +498,7 @@ class Prompt:
496
498
  except Exception:
497
499
  pass
498
500
 
499
- api = Api()
501
+ api = Api(api_key=self.confident_api_key)
500
502
  with Progress(
501
503
  SpinnerColumn(style="rgb(106,0,255)"),
502
504
  BarColumn(bar_width=60),
@@ -635,7 +637,7 @@ class Prompt:
635
637
  # Pydantic version below 2.0
636
638
  body = body.dict(by_alias=True, exclude_none=True)
637
639
 
638
- api = Api()
640
+ api = Api(api_key=self.confident_api_key)
639
641
  _, link = api.send_request(
640
642
  method=HttpMethods.POST,
641
643
  endpoint=Endpoints.PROMPTS_ENDPOINT,
@@ -692,7 +694,7 @@ class Prompt:
692
694
  )
693
695
  except AttributeError:
694
696
  body = body.dict(by_alias=True, exclude_none=True)
695
- api = Api()
697
+ api = Api(api_key=self.confident_api_key)
696
698
  data, _ = api.send_request(
697
699
  method=HttpMethods.PUT,
698
700
  endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
@@ -765,7 +767,7 @@ class Prompt:
765
767
  while True:
766
768
  await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
767
769
 
768
- api = Api()
770
+ api = Api(api_key=self.confident_api_key)
769
771
  try:
770
772
  if label:
771
773
  data, _ = api.send_request(
@@ -386,6 +386,7 @@ class LLMTestCase(BaseModel):
386
386
  [
387
387
  re.search(pattern, self.input or "") is not None,
388
388
  re.search(pattern, self.actual_output or "") is not None,
389
+ re.search(pattern, self.expected_output or "") is not None,
389
390
  ]
390
391
  )
391
392
  if isinstance(self.input, str)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "deepeval"
3
- version = "3.8.0"
3
+ version = "3.8.1"
4
4
  description = "The LLM Evaluation Framework"
5
5
  authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
6
6
  license = "Apache-2.0"
@@ -1 +0,0 @@
1
- __version__: str = "3.8.0"
File without changes
File without changes
File without changes