deepeval 3.4.3__tar.gz → 3.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (452) hide show
  1. {deepeval-3.4.3 → deepeval-3.4.5}/PKG-INFO +1 -1
  2. deepeval-3.4.5/deepeval/_version.py +1 -0
  3. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +2 -1
  4. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/confident/api.py +2 -2
  5. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/api.py +1 -2
  6. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/dataset.py +22 -8
  7. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/simulator/conversation_simulator.py +2 -3
  8. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/simulator/template.py +3 -2
  9. deepeval-3.4.5/deepeval/test_case/conversational_test_case.py +192 -0
  10. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/llm_test_case.py +124 -57
  11. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/context.py +0 -1
  12. {deepeval-3.4.3 → deepeval-3.4.5}/pyproject.toml +1 -1
  13. deepeval-3.4.3/deepeval/_version.py +0 -1
  14. deepeval-3.4.3/deepeval/test_case/conversational_test_case.py +0 -141
  15. {deepeval-3.4.3 → deepeval-3.4.5}/LICENSE.md +0 -0
  16. {deepeval-3.4.3 → deepeval-3.4.5}/README.md +0 -0
  17. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/__init__.py +0 -0
  18. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/annotation/__init__.py +0 -0
  19. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/annotation/annotation.py +0 -0
  20. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/annotation/api.py +0 -0
  21. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/__init__.py +0 -0
  22. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/arc/__init__.py +0 -0
  23. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/arc/arc.py +0 -0
  24. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/arc/mode.py +0 -0
  25. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/arc/template.py +0 -0
  26. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/base_benchmark.py +0 -0
  27. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bbq/__init__.py +0 -0
  28. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bbq/bbq.py +0 -0
  29. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bbq/task.py +0 -0
  30. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bbq/template.py +0 -0
  31. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  32. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  33. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  34. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  35. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  36. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  37. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  38. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  39. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  40. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  41. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  42. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  43. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  44. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  45. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  46. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  47. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  48. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  49. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  50. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  51. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  52. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  53. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  54. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  55. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  56. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  57. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  58. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  59. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  60. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  61. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  62. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  63. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  64. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  65. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  66. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  67. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  68. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  69. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  70. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  71. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  72. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  73. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  74. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  75. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  76. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  77. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  78. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  79. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  80. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  81. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  82. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  83. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  84. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  85. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  86. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  87. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  88. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  89. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  90. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  91. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  92. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  93. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/bool_q/template.py +0 -0
  94. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/drop/__init__.py +0 -0
  95. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/drop/drop.py +0 -0
  96. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/drop/task.py +0 -0
  97. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/drop/template.py +0 -0
  98. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  99. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  100. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  101. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  102. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  103. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/gsm8k/template.py +0 -0
  104. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  105. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  106. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/hellaswag/task.py +0 -0
  107. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/hellaswag/template.py +0 -0
  108. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  109. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  110. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/human_eval/task.py +0 -0
  111. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/human_eval/template.py +0 -0
  112. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  113. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  114. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/ifeval/template.py +0 -0
  115. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/lambada/__init__.py +0 -0
  116. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/lambada/lambada.py +0 -0
  117. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/lambada/template.py +0 -0
  118. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  119. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  120. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/logi_qa/task.py +0 -0
  121. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/logi_qa/template.py +0 -0
  122. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  123. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  124. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/math_qa/task.py +0 -0
  125. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/math_qa/template.py +0 -0
  126. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  127. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  128. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/mmlu/task.py +0 -0
  129. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/mmlu/template.py +0 -0
  130. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/modes/__init__.py +0 -0
  131. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/results.py +0 -0
  132. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/schema.py +0 -0
  133. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/squad/__init__.py +0 -0
  134. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/squad/squad.py +0 -0
  135. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/squad/task.py +0 -0
  136. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/squad/template.py +0 -0
  137. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/tasks/__init__.py +0 -0
  138. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  139. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  140. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  141. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  142. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  143. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/utils.py +0 -0
  144. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  145. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/winogrande/template.py +0 -0
  146. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  147. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/__init__.py +0 -0
  148. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/main.py +0 -0
  149. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/server.py +0 -0
  150. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/test.py +0 -0
  151. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/types.py +0 -0
  152. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/cli/utils.py +0 -0
  153. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/confident/__init__.py +0 -0
  154. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/confident/types.py +0 -0
  155. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/constants.py +0 -0
  156. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/__init__.py +0 -0
  157. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/golden.py +0 -0
  158. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/types.py +0 -0
  159. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/dataset/utils.py +0 -0
  160. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/errors.py +0 -0
  161. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/__init__.py +0 -0
  162. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/api.py +0 -0
  163. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/compare.py +0 -0
  164. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/configs.py +0 -0
  165. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/evaluate.py +0 -0
  166. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/execute.py +0 -0
  167. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/types.py +0 -0
  168. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/evaluate/utils.py +0 -0
  169. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/__init__.py +0 -0
  170. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/crewai/__init__.py +0 -0
  171. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/crewai/agent.py +0 -0
  172. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/crewai/handler.py +0 -0
  173. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/crewai/patch.py +0 -0
  174. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/hugging_face/__init__.py +0 -0
  175. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/hugging_face/callback.py +0 -0
  176. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  177. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  178. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/hugging_face/utils.py +0 -0
  179. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/langchain/__init__.py +0 -0
  180. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/langchain/callback.py +0 -0
  181. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/langchain/utils.py +0 -0
  182. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/llama_index/__init__.py +0 -0
  183. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/llama_index/agent/patched.py +0 -0
  184. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/llama_index/handler.py +0 -0
  185. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/llama_index/utils.py +0 -0
  186. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  187. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/pydantic_ai/agent.py +0 -0
  188. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/integrations/pydantic_ai/setup.py +0 -0
  189. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/key_handler.py +0 -0
  190. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/__init__.py +0 -0
  191. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  192. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  193. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  194. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/answer_relevancy/template.py +0 -0
  195. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  196. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  197. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  198. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/arena_g_eval/template.py +0 -0
  199. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  200. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  201. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  202. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/argument_correctness/schema.py +0 -0
  203. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/argument_correctness/template.py +0 -0
  204. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/base_metric.py +0 -0
  205. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/bias/__init__.py +0 -0
  206. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/bias/bias.py +0 -0
  207. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/bias/schema.py +0 -0
  208. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/bias/template.py +0 -0
  209. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  210. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  211. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_precision/schema.py +0 -0
  212. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_precision/template.py +0 -0
  213. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  214. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  215. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_recall/schema.py +0 -0
  216. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_recall/template.py +0 -0
  217. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  218. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  219. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  220. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  221. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  222. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  223. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  224. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversation_completeness/template.py +0 -0
  225. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  226. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  227. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  228. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  229. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/__init__.py +0 -0
  230. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/dag.py +0 -0
  231. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/graph.py +0 -0
  232. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/nodes.py +0 -0
  233. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/schema.py +0 -0
  234. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/templates.py +0 -0
  235. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/dag/utils.py +0 -0
  236. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/faithfulness/__init__.py +0 -0
  237. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  238. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/faithfulness/schema.py +0 -0
  239. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/faithfulness/template.py +0 -0
  240. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/g_eval/__init__.py +0 -0
  241. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/g_eval/g_eval.py +0 -0
  242. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/g_eval/schema.py +0 -0
  243. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/g_eval/template.py +0 -0
  244. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/g_eval/utils.py +0 -0
  245. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/hallucination/__init__.py +0 -0
  246. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/hallucination/hallucination.py +0 -0
  247. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/hallucination/schema.py +0 -0
  248. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/hallucination/template.py +0 -0
  249. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/indicator.py +0 -0
  250. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/json_correctness/__init__.py +0 -0
  251. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  252. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/json_correctness/schema.py +0 -0
  253. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/json_correctness/template.py +0 -0
  254. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  255. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  256. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  257. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/knowledge_retention/template.py +0 -0
  258. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp/__init__.py +0 -0
  259. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  260. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  261. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp/schema.py +0 -0
  262. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp/template.py +0 -0
  263. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  264. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  265. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  266. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  267. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/misuse/__init__.py +0 -0
  268. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/misuse/misuse.py +0 -0
  269. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/misuse/schema.py +0 -0
  270. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/misuse/template.py +0 -0
  271. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  272. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  273. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  274. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  275. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  276. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  277. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  278. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  279. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  280. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  281. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  282. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  283. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  284. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  285. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  286. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  287. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  288. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
  289. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
  290. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
  291. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
  292. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
  293. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
  294. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
  295. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
  296. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
  297. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
  298. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
  299. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
  300. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
  301. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
  302. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
  303. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
  304. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  305. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
  306. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
  307. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
  308. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  309. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
  310. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  311. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
  312. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
  313. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  314. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
  315. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  316. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  317. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  318. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  319. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/non_advice/__init__.py +0 -0
  320. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/non_advice/non_advice.py +0 -0
  321. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/non_advice/schema.py +0 -0
  322. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/non_advice/template.py +0 -0
  323. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  324. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  325. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/pii_leakage/schema.py +0 -0
  326. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/pii_leakage/template.py +0 -0
  327. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  328. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  329. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  330. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/prompt_alignment/template.py +0 -0
  331. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/ragas.py +0 -0
  332. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_adherence/__init__.py +0 -0
  333. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  334. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_adherence/schema.py +0 -0
  335. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_adherence/template.py +0 -0
  336. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_violation/__init__.py +0 -0
  337. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_violation/role_violation.py +0 -0
  338. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_violation/schema.py +0 -0
  339. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/role_violation/template.py +0 -0
  340. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/summarization/__init__.py +0 -0
  341. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/summarization/schema.py +0 -0
  342. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/summarization/summarization.py +0 -0
  343. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/summarization/template.py +0 -0
  344. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/task_completion/__init__.py +0 -0
  345. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/task_completion/schema.py +0 -0
  346. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/task_completion/task_completion.py +0 -0
  347. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/task_completion/template.py +0 -0
  348. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  349. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  350. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/toxicity/__init__.py +0 -0
  351. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/toxicity/schema.py +0 -0
  352. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/toxicity/template.py +0 -0
  353. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/toxicity/toxicity.py +0 -0
  354. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  355. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  356. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/turn_relevancy/template.py +0 -0
  357. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  358. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/metrics/utils.py +0 -0
  359. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/__init__.py +0 -0
  360. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/_summac_model.py +0 -0
  361. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/answer_relevancy_model.py +0 -0
  362. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/base_model.py +0 -0
  363. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/detoxify_model.py +0 -0
  364. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/embedding_models/__init__.py +0 -0
  365. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
  366. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
  367. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
  368. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
  369. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/hallucination_model.py +0 -0
  370. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/__init__.py +0 -0
  371. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
  372. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/anthropic_model.py +0 -0
  373. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/azure_model.py +0 -0
  374. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/deepseek_model.py +0 -0
  375. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/gemini_model.py +0 -0
  376. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/grok_model.py +0 -0
  377. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/kimi_model.py +0 -0
  378. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/litellm_model.py +0 -0
  379. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/local_model.py +0 -0
  380. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/ollama_model.py +0 -0
  381. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/openai_model.py +0 -0
  382. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/llms/utils.py +0 -0
  383. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/mlllms/__init__.py +0 -0
  384. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/mlllms/gemini_model.py +0 -0
  385. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/mlllms/ollama_model.py +0 -0
  386. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/mlllms/openai_model.py +0 -0
  387. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/summac_model.py +0 -0
  388. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/unbias_model.py +0 -0
  389. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/models/utils.py +0 -0
  390. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai/__init__.py +0 -0
  391. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai/extractors.py +0 -0
  392. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai/patch.py +0 -0
  393. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai/utils.py +0 -0
  394. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai_agents/__init__.py +0 -0
  395. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai_agents/callback_handler.py +0 -0
  396. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/openai_agents/extractors.py +0 -0
  397. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/plugins/__init__.py +0 -0
  398. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/plugins/plugin.py +0 -0
  399. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/progress_context.py +0 -0
  400. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/prompt/__init__.py +0 -0
  401. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/prompt/api.py +0 -0
  402. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/prompt/prompt.py +0 -0
  403. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/prompt/utils.py +0 -0
  404. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/py.typed +0 -0
  405. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/red_teaming/README.md +0 -0
  406. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/scorer/__init__.py +0 -0
  407. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/scorer/scorer.py +0 -0
  408. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/simulator/__init__.py +0 -0
  409. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/simulator/schema.py +0 -0
  410. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/singleton.py +0 -0
  411. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/__init__.py +0 -0
  412. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/base_synthesizer.py +0 -0
  413. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/chunking/__init__.py +0 -0
  414. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  415. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  416. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/config.py +0 -0
  417. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/schema.py +0 -0
  418. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/synthesizer.py +0 -0
  419. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/templates/__init__.py +0 -0
  420. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/templates/template.py +0 -0
  421. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  422. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  423. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/types.py +0 -0
  424. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/synthesizer/utils.py +0 -0
  425. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/telemetry.py +0 -0
  426. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/__init__.py +0 -0
  427. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/arena_test_case.py +0 -0
  428. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/mcp.py +0 -0
  429. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/mllm_test_case.py +0 -0
  430. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_case/utils.py +0 -0
  431. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/__init__.py +0 -0
  432. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/api.py +0 -0
  433. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/cache.py +0 -0
  434. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/hooks.py +0 -0
  435. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/hyperparameters.py +0 -0
  436. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/test_run/test_run.py +0 -0
  437. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/__init__.py +0 -0
  438. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/api.py +0 -0
  439. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/offline_evals/__init__.py +0 -0
  440. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/offline_evals/api.py +0 -0
  441. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/offline_evals/span.py +0 -0
  442. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/offline_evals/thread.py +0 -0
  443. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/offline_evals/trace.py +0 -0
  444. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/otel/__init__.py +0 -0
  445. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/otel/exporter.py +0 -0
  446. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/otel/utils.py +0 -0
  447. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/patchers.py +0 -0
  448. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  449. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/tracing.py +0 -0
  450. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/types.py +0 -0
  451. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/tracing/utils.py +0 -0
  452. {deepeval-3.4.3 → deepeval-3.4.5}/deepeval/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.4.3
3
+ Version: 3.4.5
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.4.5"
@@ -119,7 +119,8 @@ class EquityMedQA(DeepEvalBaseBenchmark):
119
119
  strict_mode=True,
120
120
  )
121
121
  score = metric.measure(
122
- LLMTestCase(golden.input, prediction), _show_indicator=False
122
+ LLMTestCase(input=golden.input, actual_output=prediction),
123
+ _show_indicator=False,
123
124
  )
124
125
  flipped_score = (
125
126
  1 - metric.score if metric.score in [0, 1] else metric.score
@@ -67,8 +67,8 @@ class HttpMethods(Enum):
67
67
 
68
68
 
69
69
  class Endpoints(Enum):
70
- DATASET_ENDPOINT = "/v1/datasets"
71
- DATASET_QUEUE_ENDPOINT = "/v1/datasets/:alias/queue"
70
+ DATASET_ALIAS_ENDPOINT = "/v1/datasets/:alias"
71
+ DATASET_ALIAS_QUEUE_ENDPOINT = "/v1/datasets/:alias/queue"
72
72
 
73
73
  TEST_RUN_ENDPOINT = "/v1/test-run"
74
74
  TRACES_ENDPOINT = "/v1/traces"
@@ -5,8 +5,7 @@ from deepeval.dataset.golden import Golden, ConversationalGolden
5
5
 
6
6
 
7
7
  class APIDataset(BaseModel):
8
- alias: str
9
- overwrite: bool
8
+ finalized: bool
10
9
  goldens: Optional[List[Golden]] = Field(None)
11
10
  conversational_goldens: Optional[List[ConversationalGolden]] = Field(
12
11
  None, alias="conversationalGoldens"
@@ -46,6 +46,7 @@ from deepeval.test_run import (
46
46
  )
47
47
  from deepeval.dataset.types import global_evaluation_tasks
48
48
  from deepeval.openai.utils import openai_test_case_pairs
49
+ from deepeval.tracing import trace_manager
49
50
 
50
51
 
51
52
  valid_file_types = ["csv", "json", "jsonl"]
@@ -656,7 +657,7 @@ class EvaluationDataset:
656
657
  def push(
657
658
  self,
658
659
  alias: str,
659
- overwrite: bool = False,
660
+ finalized: bool = True,
660
661
  ):
661
662
  if len(self.goldens) == 0:
662
663
  raise ValueError(
@@ -665,10 +666,9 @@ class EvaluationDataset:
665
666
 
666
667
  api = Api()
667
668
  api_dataset = APIDataset(
668
- alias=alias,
669
- overwrite=overwrite,
670
669
  goldens=self.goldens if not self._multi_turn else None,
671
670
  conversationalGoldens=(self.goldens if self._multi_turn else None),
671
+ finalized=finalized,
672
672
  )
673
673
  try:
674
674
  body = api_dataset.model_dump(by_alias=True, exclude_none=True)
@@ -678,8 +678,9 @@ class EvaluationDataset:
678
678
 
679
679
  _, link = api.send_request(
680
680
  method=HttpMethods.POST,
681
- endpoint=Endpoints.DATASET_ENDPOINT,
681
+ endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
682
682
  body=body,
683
+ url_params={"alias": alias},
683
684
  )
684
685
  if link:
685
686
  console = Console()
@@ -711,9 +712,9 @@ class EvaluationDataset:
711
712
  start_time = time.perf_counter()
712
713
  data, _ = api.send_request(
713
714
  method=HttpMethods.GET,
714
- endpoint=Endpoints.DATASET_ENDPOINT,
715
+ endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
716
+ url_params={"alias": alias},
715
717
  params={
716
- "alias": alias,
717
718
  "finalized": str(finalized).lower(),
718
719
  "public": str(public).lower(),
719
720
  },
@@ -797,7 +798,7 @@ class EvaluationDataset:
797
798
 
798
799
  _, link = api.send_request(
799
800
  method=HttpMethods.POST,
800
- endpoint=Endpoints.DATASET_QUEUE_ENDPOINT,
801
+ endpoint=Endpoints.DATASET_ALIAS_QUEUE_ENDPOINT,
801
802
  body=body,
802
803
  url_params={"alias": alias},
803
804
  )
@@ -808,6 +809,19 @@ class EvaluationDataset:
808
809
  f"[link={link}]{link}[/link]"
809
810
  )
810
811
 
812
+ def delete(
813
+ self,
814
+ alias: str,
815
+ ):
816
+ api = Api()
817
+ api.send_request(
818
+ method=HttpMethods.DELETE,
819
+ endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
820
+ url_params={"alias": alias},
821
+ )
822
+ console = Console()
823
+ console.print("✅ Dataset successfully deleted from Confident AI!")
824
+
811
825
  def generate_goldens_from_docs(
812
826
  self,
813
827
  document_paths: List[str],
@@ -1112,7 +1126,7 @@ class EvaluationDataset:
1112
1126
 
1113
1127
  if not self.goldens or len(self.goldens) == 0:
1114
1128
  raise ValueError("Unable to evaluate dataset with no goldens.")
1115
-
1129
+ trace_manager.integration_traces_to_evaluate.clear()
1116
1130
  goldens = self.goldens
1117
1131
  with capture_evaluation_run("traceable evaluate()"):
1118
1132
  global_test_run_manager.reset()
@@ -1,7 +1,6 @@
1
1
  from typing import Optional, List, Union, Callable
2
2
  from rich.progress import Progress
3
3
  from pydantic import BaseModel
4
- from dataclasses import asdict
5
4
  import inspect
6
5
  import asyncio
7
6
  import uuid
@@ -465,7 +464,7 @@ class ConversationSimulator:
465
464
  ):
466
465
  if not self.run_remote:
467
466
  conversation_history = json.dumps(
468
- [asdict(t) for t in turns], indent=4
467
+ [t.model_dump() for t in turns], indent=4
469
468
  )
470
469
  prompt = self.template.stop_simulation(
471
470
  conversation_history, golden.expected_outcome
@@ -510,7 +509,7 @@ class ConversationSimulator:
510
509
  ):
511
510
  if not self.run_remote:
512
511
  conversation_history = json.dumps(
513
- [asdict(t) for t in turns], indent=4
512
+ [t.model_dump() for t in turns], indent=4
514
513
  )
515
514
  prompt = self.template.stop_simulation(
516
515
  conversation_history, golden.expected_outcome
@@ -1,4 +1,3 @@
1
- from dataclasses import asdict
2
1
  from typing import List
3
2
  import textwrap
4
3
  import json
@@ -48,7 +47,9 @@ class ConversationSimulatorTemplate:
48
47
  turns: List[Turn],
49
48
  language: str,
50
49
  ) -> str:
51
- previous_conversation = json.dumps([asdict(t) for t in turns], indent=4)
50
+ previous_conversation = json.dumps(
51
+ [t.model_dump() for t in turns], indent=4
52
+ )
52
53
  prompt = textwrap.dedent(
53
54
  f"""
54
55
  Pretend you are a user of an LLM app. Your task is to generate the next user input in {language}
@@ -0,0 +1,192 @@
1
+ from pydantic import (
2
+ BaseModel,
3
+ Field,
4
+ PrivateAttr,
5
+ model_validator,
6
+ AliasChoices,
7
+ )
8
+ from typing import List, Optional, Dict, Literal
9
+ from copy import deepcopy
10
+ from enum import Enum
11
+
12
+ from deepeval.test_case import ToolCall
13
+ from deepeval.test_case.mcp import (
14
+ MCPServer,
15
+ MCPPromptCall,
16
+ MCPResourceCall,
17
+ MCPToolCall,
18
+ validate_mcp_servers,
19
+ )
20
+
21
+
22
+ class TurnParams(Enum):
23
+ ROLE = "role"
24
+ CONTENT = "content"
25
+ SCENARIO = "scenario"
26
+ EXPECTED_OUTCOME = "expected_outcome"
27
+ RETRIEVAL_CONTEXT = "retrieval_context"
28
+ TOOLS_CALLED = "tools_called"
29
+ MCP_TOOLS = "mcp_tools_called"
30
+ MCP_RESOURCES = "mcp_resources_called"
31
+ MCP_PROMPTS = "mcp_prompts_called"
32
+
33
+
34
+ class Turn(BaseModel):
35
+ role: Literal["user", "assistant"]
36
+ content: str
37
+ user_id: Optional[str] = Field(
38
+ default=None, validation_alias=AliasChoices("userId", "user_id")
39
+ )
40
+ retrieval_context: Optional[List[str]] = Field(
41
+ default=None,
42
+ validation_alias=AliasChoices("retrievalContext", "retrieval_context"),
43
+ )
44
+ tools_called: Optional[List[ToolCall]] = Field(
45
+ default=None,
46
+ validation_alias=AliasChoices("toolsCalled", "tools_called"),
47
+ )
48
+ mcp_tools_called: Optional[List[MCPToolCall]] = Field(default=None)
49
+ mcp_resources_called: Optional[List[MCPResourceCall]] = Field(default=None)
50
+ mcp_prompts_called: Optional[List[MCPPromptCall]] = Field(default=None)
51
+ additional_metadata: Optional[Dict] = Field(
52
+ default=None,
53
+ serialization_alias="additionalMetadata",
54
+ validation_alias=AliasChoices(
55
+ "additionalMetadata", "additional_metadata"
56
+ ),
57
+ )
58
+ _mcp_interaction: bool = PrivateAttr(default=False)
59
+
60
+ def __repr__(self):
61
+ attrs = [f"role={self.role!r}", f"content={self.content!r}"]
62
+ if self.user_id is not None:
63
+ attrs.append(f"user_id={self.user_id!r}")
64
+ if self.retrieval_context is not None:
65
+ attrs.append(f"retrieval_context={self.retrieval_context!r}")
66
+ if self.tools_called is not None:
67
+ attrs.append(f"tools_called={self.tools_called!r}")
68
+ if self.mcp_tools_called is not None:
69
+ attrs.append(f"mcp_tools_called={self.mcp_tools_called!r}")
70
+ if self.mcp_resources_called is not None:
71
+ attrs.append(f"mcp_resources_called={self.mcp_resources_called!r}")
72
+ if self.mcp_prompts_called is not None:
73
+ attrs.append(f"mcp_prompts_called={self.mcp_prompts_called!r}")
74
+ if self.additional_metadata is not None:
75
+ attrs.append(f"additional_metadata={self.additional_metadata!r}")
76
+ return f"Turn({', '.join(attrs)})"
77
+
78
+ @model_validator(mode="before")
79
+ def validate_input(cls, data):
80
+ mcp_tools_called = data.get("mcp_tools_called")
81
+ mcp_prompts_called = data.get("mcp_prompts_called")
82
+ mcp_resources_called = data.get("mcp_resources_called")
83
+
84
+ if (
85
+ mcp_tools_called is not None
86
+ or mcp_prompts_called is not None
87
+ or mcp_resources_called is not None
88
+ ):
89
+ from mcp.types import (
90
+ CallToolResult,
91
+ ReadResourceResult,
92
+ GetPromptResult,
93
+ )
94
+
95
+ data["_mcp_interaction"] = True
96
+ if mcp_tools_called is not None:
97
+ if not isinstance(mcp_tools_called, list) or not all(
98
+ isinstance(tool_called, MCPToolCall)
99
+ and isinstance(tool_called.result, CallToolResult)
100
+ for tool_called in mcp_tools_called
101
+ ):
102
+ raise TypeError(
103
+ "The 'tools_called' must be a list of 'MCPToolCall' with result of type 'CallToolResult' from mcp.types"
104
+ )
105
+
106
+ if mcp_resources_called is not None:
107
+ if not isinstance(mcp_resources_called, list) or not all(
108
+ isinstance(resource_called, MCPResourceCall)
109
+ and isinstance(resource_called.result, ReadResourceResult)
110
+ for resource_called in mcp_resources_called
111
+ ):
112
+ raise TypeError(
113
+ "The 'resources_called' must be a list of 'MCPResourceCall' with result of type 'ReadResourceResult' from mcp.types"
114
+ )
115
+
116
+ if mcp_prompts_called is not None:
117
+ if not isinstance(mcp_prompts_called, list) or not all(
118
+ isinstance(prompt_called, MCPPromptCall)
119
+ and isinstance(prompt_called.result, GetPromptResult)
120
+ for prompt_called in mcp_prompts_called
121
+ ):
122
+ raise TypeError(
123
+ "The 'prompts_called' must be a list of 'MCPPromptCall' with result of type 'GetPromptResult' from mcp.types"
124
+ )
125
+
126
+ return data
127
+
128
+
129
+ class ConversationalTestCase(BaseModel):
130
+ turns: List[Turn]
131
+ scenario: Optional[str] = Field(default=None)
132
+ context: Optional[List[str]] = Field(default=None)
133
+ name: Optional[str] = Field(default=None)
134
+ user_description: Optional[str] = Field(
135
+ default=None,
136
+ serialization_alias="userDescription",
137
+ validation_alias=AliasChoices("userDescription", "user_description"),
138
+ )
139
+ expected_outcome: Optional[str] = Field(
140
+ default=None,
141
+ serialization_alias="expectedOutcome",
142
+ validation_alias=AliasChoices("expectedOutcome", "expected_outcome"),
143
+ )
144
+ chatbot_role: Optional[str] = Field(
145
+ default=None,
146
+ serialization_alias="chatbotRole",
147
+ validation_alias=AliasChoices("chatbotRole", "chatbot_role"),
148
+ )
149
+ additional_metadata: Optional[Dict] = Field(
150
+ default=None,
151
+ serialization_alias="additionalMetadata",
152
+ validation_alias=AliasChoices(
153
+ "additionalMetadata", "additional_metadata"
154
+ ),
155
+ )
156
+ comments: Optional[str] = Field(default=None)
157
+ tags: Optional[List[str]] = Field(default=None)
158
+ mcp_servers: Optional[List[MCPServer]] = Field(default=None)
159
+
160
+ _dataset_rank: Optional[int] = PrivateAttr(default=None)
161
+ _dataset_alias: Optional[str] = PrivateAttr(default=None)
162
+ _dataset_id: Optional[str] = PrivateAttr(default=None)
163
+
164
+ @model_validator(mode="before")
165
+ def validate_input(cls, data):
166
+ turns = data.get("turns")
167
+ context = data.get("context")
168
+ mcp_servers = data.get("mcp_servers")
169
+
170
+ if len(turns) == 0:
171
+ raise TypeError("'turns' must not be empty")
172
+
173
+ # Ensure `context` is None or a list of strings
174
+ if context is not None:
175
+ if not isinstance(context, list) or not all(
176
+ isinstance(item, str) for item in context
177
+ ):
178
+ raise TypeError("'context' must be None or a list of strings")
179
+
180
+ if mcp_servers is not None:
181
+ validate_mcp_servers(mcp_servers)
182
+
183
+ copied_turns = []
184
+ for turn in turns:
185
+ if not isinstance(turn, Turn):
186
+ raise TypeError("'turns' must be a list of `Turn`s")
187
+
188
+ copied_turns.append(deepcopy(turn))
189
+
190
+ data["turns"] = copied_turns
191
+
192
+ return data
@@ -1,5 +1,10 @@
1
- from pydantic import Field, BaseModel
2
- from dataclasses import dataclass, field
1
+ from pydantic import (
2
+ Field,
3
+ BaseModel,
4
+ model_validator,
5
+ PrivateAttr,
6
+ AliasChoices,
7
+ )
3
8
  from typing import List, Optional, Dict, Any
4
9
  from enum import Enum
5
10
  import json
@@ -68,7 +73,9 @@ class ToolCall(BaseModel):
68
73
  reasoning: Optional[str] = None
69
74
  output: Optional[Any] = None
70
75
  input_parameters: Optional[Dict[str, Any]] = Field(
71
- None, serialization_alias="inputParameters"
76
+ None,
77
+ serialization_alias="inputParameters",
78
+ validation_alias=AliasChoices("inputParameters", "input_parameters"),
72
79
  )
73
80
 
74
81
  def __eq__(self, other):
@@ -143,120 +150,180 @@ class ToolCall(BaseModel):
143
150
  )
144
151
 
145
152
 
146
- @dataclass
147
- class LLMTestCase:
153
+ class LLMTestCase(BaseModel):
148
154
  input: str
149
- actual_output: Optional[str] = None
150
- expected_output: Optional[str] = None
151
- context: Optional[List[str]] = None
152
- retrieval_context: Optional[List[str]] = None
153
- additional_metadata: Optional[Dict] = None
154
- tools_called: Optional[List[ToolCall]] = None
155
- comments: Optional[str] = None
156
- expected_tools: Optional[List[ToolCall]] = None
157
- token_cost: Optional[float] = None
158
- completion_time: Optional[float] = None
159
- name: Optional[str] = field(default=None)
160
- tags: Optional[List[str]] = field(default=None)
161
- mcp_servers: Optional[List[MCPServer]] = None
162
- mcp_tools_called: Optional[List[MCPToolCall]] = None
163
- mcp_resources_called: Optional[List[MCPResourceCall]] = None
164
- mcp_prompts_called: Optional[List[MCPPromptCall]] = None
165
- _trace_dict: Optional[Dict] = field(default=None, repr=False)
166
- _dataset_rank: Optional[int] = field(default=None, repr=False)
167
- _dataset_alias: Optional[str] = field(default=None, repr=False)
168
- _dataset_id: Optional[str] = field(default=None, repr=False)
169
- _identifier: Optional[str] = field(default=str(uuid.uuid4()), repr=False)
170
-
171
- def __post_init__(self):
172
- if self.input is not None:
173
- if not isinstance(self.input, str):
155
+ actual_output: Optional[str] = Field(
156
+ default=None,
157
+ serialization_alias="actualOutput",
158
+ validation_alias=AliasChoices("actualOutput", "actual_output"),
159
+ )
160
+ expected_output: Optional[str] = Field(
161
+ default=None,
162
+ serialization_alias="expectedOutput",
163
+ validation_alias=AliasChoices("expectedOutput", "expected_output"),
164
+ )
165
+ context: Optional[List[str]] = Field(
166
+ default=None, serialization_alias="context"
167
+ )
168
+ retrieval_context: Optional[List[str]] = Field(
169
+ default=None,
170
+ serialization_alias="retrievalContext",
171
+ validation_alias=AliasChoices("retrievalContext", "retrieval_context"),
172
+ )
173
+ additional_metadata: Optional[Dict] = Field(
174
+ default=None,
175
+ serialization_alias="additionalMetadata",
176
+ validation_alias=AliasChoices(
177
+ "additionalMetadata", "additional_metadata"
178
+ ),
179
+ )
180
+ tools_called: Optional[List[ToolCall]] = Field(
181
+ default=None,
182
+ serialization_alias="toolsCalled",
183
+ validation_alias=AliasChoices("toolsCalled", "tools_called"),
184
+ )
185
+ comments: Optional[str] = Field(
186
+ default=None, serialization_alias="comments"
187
+ )
188
+ expected_tools: Optional[List[ToolCall]] = Field(
189
+ default=None,
190
+ serialization_alias="expectedTools",
191
+ validation_alias=AliasChoices("expectedTools", "expected_tools"),
192
+ )
193
+ token_cost: Optional[float] = Field(
194
+ default=None,
195
+ serialization_alias="tokenCost",
196
+ validation_alias=AliasChoices("tokenCost", "token_cost"),
197
+ )
198
+ completion_time: Optional[float] = Field(
199
+ default=None,
200
+ serialization_alias="completionTime",
201
+ validation_alias=AliasChoices("completionTime", "completion_time"),
202
+ )
203
+ name: Optional[str] = Field(default=None)
204
+ tags: Optional[List[str]] = Field(default=None)
205
+ mcp_servers: Optional[List[MCPServer]] = Field(default=None)
206
+ mcp_tools_called: Optional[List[MCPToolCall]] = Field(
207
+ default=None,
208
+ serialization_alias="mcpToolsCalled",
209
+ )
210
+ mcp_resources_called: Optional[List[MCPResourceCall]] = Field(
211
+ default=None, serialization_alias="mcpResourcesCalled"
212
+ )
213
+ mcp_prompts_called: Optional[List[MCPPromptCall]] = Field(
214
+ default=None, serialization_alias="mcpPromptsCalled"
215
+ )
216
+ _trace_dict: Optional[Dict] = PrivateAttr(default=None)
217
+ _dataset_rank: Optional[int] = PrivateAttr(default=None)
218
+ _dataset_alias: Optional[str] = PrivateAttr(default=None)
219
+ _dataset_id: Optional[str] = PrivateAttr(default=None)
220
+ _identifier: Optional[str] = PrivateAttr(
221
+ default_factory=lambda: str(uuid.uuid4())
222
+ )
223
+
224
+ @model_validator(mode="before")
225
+ def validate_input(cls, data):
226
+ input = data.get("input")
227
+ actual_output = data.get("actual_output")
228
+ context = data.get("context")
229
+ retrieval_context = data.get("retrieval_context")
230
+ tools_called = data.get("tools_called")
231
+ expected_tools = data.get("expected_tools")
232
+ mcp_servers = data.get("mcp_servers")
233
+ mcp_tools_called = data.get("mcp_tools_called")
234
+ mcp_resources_called = data.get("mcp_resources_called")
235
+ mcp_prompts_called = data.get("mcp_prompts_called")
236
+
237
+ if input is not None:
238
+ if not isinstance(input, str):
174
239
  raise TypeError("'input' must be a string")
175
240
 
176
- if self.actual_output is not None:
177
- if not isinstance(self.actual_output, str):
241
+ if actual_output is not None:
242
+ if not isinstance(actual_output, str):
178
243
  raise TypeError("'actual_output' must be a string")
179
244
 
180
245
  # Ensure `context` is None or a list of strings
181
- if self.context is not None:
182
- if not isinstance(self.context, list) or not all(
183
- isinstance(item, str) for item in self.context
246
+ if context is not None:
247
+ if not isinstance(context, list) or not all(
248
+ isinstance(item, str) for item in context
184
249
  ):
185
250
  raise TypeError("'context' must be None or a list of strings")
186
251
 
187
252
  # Ensure `retrieval_context` is None or a list of strings
188
- if self.retrieval_context is not None:
189
- if not isinstance(self.retrieval_context, list) or not all(
190
- isinstance(item, str) for item in self.retrieval_context
253
+ if retrieval_context is not None:
254
+ if not isinstance(retrieval_context, list) or not all(
255
+ isinstance(item, str) for item in retrieval_context
191
256
  ):
192
257
  raise TypeError(
193
258
  "'retrieval_context' must be None or a list of strings"
194
259
  )
195
260
 
196
261
  # Ensure `tools_called` is None or a list of strings
197
- if self.tools_called is not None:
198
- if not isinstance(self.tools_called, list) or not all(
199
- isinstance(item, ToolCall) for item in self.tools_called
262
+ if tools_called is not None:
263
+ if not isinstance(tools_called, list) or not all(
264
+ isinstance(item, ToolCall) for item in tools_called
200
265
  ):
201
266
  raise TypeError(
202
267
  "'tools_called' must be None or a list of `ToolCall`"
203
268
  )
204
269
 
205
270
  # Ensure `expected_tools` is None or a list of strings
206
- if self.expected_tools is not None:
207
- if not isinstance(self.expected_tools, list) or not all(
208
- isinstance(item, ToolCall) for item in self.expected_tools
271
+ if expected_tools is not None:
272
+ if not isinstance(expected_tools, list) or not all(
273
+ isinstance(item, ToolCall) for item in expected_tools
209
274
  ):
210
275
  raise TypeError(
211
276
  "'expected_tools' must be None or a list of `ToolCall`"
212
277
  )
213
278
 
214
279
  # Ensure `mcp_server` is None or a list of `MCPServer`
215
- if self.mcp_servers is not None:
216
- if not isinstance(self.mcp_servers, list) or not all(
217
- isinstance(item, MCPServer) for item in self.mcp_servers
280
+ if mcp_servers is not None:
281
+ if not isinstance(mcp_servers, list) or not all(
282
+ isinstance(item, MCPServer) for item in mcp_servers
218
283
  ):
219
284
  raise TypeError(
220
285
  "'mcp_server' must be None or a list of 'MCPServer'"
221
286
  )
222
287
  else:
223
- validate_mcp_servers(self.mcp_servers)
288
+ validate_mcp_servers(mcp_servers)
224
289
 
225
290
  # Ensure `mcp_tools_called` is None or a list of `MCPToolCall`
226
- if self.mcp_tools_called is not None:
291
+ if mcp_tools_called is not None:
227
292
  from mcp.types import CallToolResult
228
293
 
229
- if not isinstance(self.mcp_tools_called, list) or not all(
294
+ if not isinstance(mcp_tools_called, list) or not all(
230
295
  isinstance(tool_called, MCPToolCall)
231
296
  and isinstance(tool_called.result, CallToolResult)
232
- for tool_called in self.mcp_tools_called
297
+ for tool_called in mcp_tools_called
233
298
  ):
234
299
  raise TypeError(
235
300
  "The 'tools_called' must be a list of 'MCPToolCall' with result of type 'CallToolResult' from mcp.types"
236
301
  )
237
302
 
238
303
  # Ensure `mcp_resources_called` is None or a list of `MCPResourceCall`
239
- if self.mcp_resources_called is not None:
304
+ if mcp_resources_called is not None:
240
305
  from mcp.types import ReadResourceResult
241
306
 
242
- if not isinstance(self.mcp_resources_called, list) or not all(
307
+ if not isinstance(mcp_resources_called, list) or not all(
243
308
  isinstance(resource_called, MCPResourceCall)
244
309
  and isinstance(resource_called.result, ReadResourceResult)
245
- for resource_called in self.mcp_resources_called
310
+ for resource_called in mcp_resources_called
246
311
  ):
247
312
  raise TypeError(
248
313
  "The 'resources_called' must be a list of 'MCPResourceCall' with result of type 'ReadResourceResult' from mcp.types"
249
314
  )
250
315
 
251
316
  # Ensure `mcp_prompts_called` is None or a list of `MCPPromptCall`
252
- if self.mcp_prompts_called is not None:
317
+ if mcp_prompts_called is not None:
253
318
  from mcp.types import GetPromptResult
254
319
 
255
- if not isinstance(self.mcp_prompts_called, list) or not all(
320
+ if not isinstance(mcp_prompts_called, list) or not all(
256
321
  isinstance(prompt_called, MCPPromptCall)
257
322
  and isinstance(prompt_called.result, GetPromptResult)
258
- for prompt_called in self.mcp_prompts_called
323
+ for prompt_called in mcp_prompts_called
259
324
  ):
260
325
  raise TypeError(
261
326
  "The 'prompts_called' must be a list of 'MCPPromptCall' with result of type 'GetPromptResult' from mcp.types"
262
327
  )
328
+
329
+ return data
@@ -4,7 +4,6 @@ from contextvars import ContextVar
4
4
  from deepeval.tracing.types import BaseSpan, Trace
5
5
  from deepeval.test_case.llm_test_case import ToolCall, LLMTestCase
6
6
  from deepeval.tracing.types import LlmSpan, RetrieverSpan
7
- from deepeval.metrics import BaseMetric
8
7
 
9
8
  current_span_context: ContextVar[Optional[BaseSpan]] = ContextVar(
10
9
  "current_span", default=None
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "deepeval"
3
- version = "3.4.3"
3
+ version = "3.4.5"
4
4
  description = "The LLM Evaluation Framework"
5
5
  authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
6
6
  license = "Apache-2.0"
@@ -1 +0,0 @@
1
- __version__: str = "3.4.3"