deepeval 3.5.9__tar.gz → 3.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (473) hide show
  1. {deepeval-3.5.9 → deepeval-3.6.0}/PKG-INFO +1 -1
  2. deepeval-3.6.0/deepeval/_version.py +1 -0
  3. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/settings_manager.py +1 -1
  4. deepeval-3.6.0/deepeval/contextvars.py +25 -0
  5. deepeval-3.6.0/deepeval/dataset/__init__.py +11 -0
  6. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/execute.py +15 -3
  7. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai_agents/__init__.py +4 -3
  8. deepeval-3.6.0/deepeval/openai_agents/agent.py +36 -0
  9. deepeval-3.6.0/deepeval/openai_agents/callback_handler.py +135 -0
  10. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai_agents/extractors.py +83 -7
  11. deepeval-3.6.0/deepeval/openai_agents/patch.py +309 -0
  12. deepeval-3.6.0/deepeval/openai_agents/runner.py +348 -0
  13. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/context.py +1 -0
  14. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/tracing.py +3 -0
  15. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/utils.py +4 -3
  16. {deepeval-3.5.9 → deepeval-3.6.0}/pyproject.toml +1 -1
  17. deepeval-3.5.9/deepeval/_version.py +0 -1
  18. deepeval-3.5.9/deepeval/dataset/__init__.py +0 -5
  19. deepeval-3.5.9/deepeval/openai_agents/agent.py +0 -194
  20. deepeval-3.5.9/deepeval/openai_agents/callback_handler.py +0 -134
  21. deepeval-3.5.9/deepeval/openai_agents/patch.py +0 -115
  22. deepeval-3.5.9/deepeval/openai_agents/runner.py +0 -335
  23. {deepeval-3.5.9 → deepeval-3.6.0}/LICENSE.md +0 -0
  24. {deepeval-3.5.9 → deepeval-3.6.0}/README.md +0 -0
  25. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/__init__.py +0 -0
  26. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/__init__.py +0 -0
  27. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/annotation.py +0 -0
  28. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/api.py +0 -0
  29. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/__init__.py +0 -0
  30. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/__init__.py +0 -0
  31. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/arc.py +0 -0
  32. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/mode.py +0 -0
  33. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/template.py +0 -0
  34. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/base_benchmark.py +0 -0
  35. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/__init__.py +0 -0
  36. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/bbq.py +0 -0
  37. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/task.py +0 -0
  38. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/template.py +0 -0
  39. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  40. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  41. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  42. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  43. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  44. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  45. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  46. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  47. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  48. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  49. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  50. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  51. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  52. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  53. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  54. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  55. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  56. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  57. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  58. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  59. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  60. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  61. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  62. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  63. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  64. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  65. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  66. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  67. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  68. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  69. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  70. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  71. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  72. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  73. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  74. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  75. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  76. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  77. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  78. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  79. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  80. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  81. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  82. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  83. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  84. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  85. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  86. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  87. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  88. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  89. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  90. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  91. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  92. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  93. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  94. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  95. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  96. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  97. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  98. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  99. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  100. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  101. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/template.py +0 -0
  102. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/__init__.py +0 -0
  103. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/drop.py +0 -0
  104. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/task.py +0 -0
  105. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/template.py +0 -0
  106. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  107. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  108. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  109. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  110. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  111. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  112. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/template.py +0 -0
  113. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  114. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  115. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/task.py +0 -0
  116. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/template.py +0 -0
  117. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  118. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  119. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/task.py +0 -0
  120. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/template.py +0 -0
  121. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  122. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  123. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/template.py +0 -0
  124. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/__init__.py +0 -0
  125. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/lambada.py +0 -0
  126. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/template.py +0 -0
  127. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  128. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  129. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/task.py +0 -0
  130. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/template.py +0 -0
  131. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  132. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  133. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/task.py +0 -0
  134. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/template.py +0 -0
  135. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  136. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  137. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/task.py +0 -0
  138. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/template.py +0 -0
  139. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/modes/__init__.py +0 -0
  140. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/results.py +0 -0
  141. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/schema.py +0 -0
  142. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/__init__.py +0 -0
  143. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/squad.py +0 -0
  144. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/task.py +0 -0
  145. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/template.py +0 -0
  146. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/tasks/__init__.py +0 -0
  147. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  148. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  149. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  150. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  151. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  152. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/utils.py +0 -0
  153. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  154. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/template.py +0 -0
  155. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  156. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/__init__.py +0 -0
  157. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/dotenv_handler.py +0 -0
  158. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/main.py +0 -0
  159. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/server.py +0 -0
  160. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/test.py +0 -0
  161. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/types.py +0 -0
  162. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/utils.py +0 -0
  163. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/__init__.py +0 -0
  164. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/api.py +0 -0
  165. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/types.py +0 -0
  166. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/__init__.py +0 -0
  167. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/settings.py +0 -0
  168. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/utils.py +0 -0
  169. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/constants.py +0 -0
  170. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/api.py +0 -0
  171. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/dataset.py +0 -0
  172. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/golden.py +0 -0
  173. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/test_run_tracer.py +0 -0
  174. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/types.py +0 -0
  175. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/utils.py +0 -0
  176. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/errors.py +0 -0
  177. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/__init__.py +0 -0
  178. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/api.py +0 -0
  179. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/compare.py +0 -0
  180. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/configs.py +0 -0
  181. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/evaluate.py +0 -0
  182. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/types.py +0 -0
  183. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/utils.py +0 -0
  184. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/__init__.py +0 -0
  185. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/__init__.py +0 -0
  186. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/agent.py +0 -0
  187. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/handler.py +0 -0
  188. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/patch.py +0 -0
  189. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/__init__.py +0 -0
  190. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/callback.py +0 -0
  191. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  192. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  193. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/utils.py +0 -0
  194. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/__init__.py +0 -0
  195. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/callback.py +0 -0
  196. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/patch.py +0 -0
  197. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/utils.py +0 -0
  198. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/__init__.py +0 -0
  199. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/agent/patched.py +0 -0
  200. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/handler.py +0 -0
  201. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/utils.py +0 -0
  202. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  203. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/agent.py +0 -0
  204. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
  205. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  206. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/key_handler.py +0 -0
  207. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/__init__.py +0 -0
  208. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  209. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  210. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  211. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/template.py +0 -0
  212. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  213. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  214. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  215. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/template.py +0 -0
  216. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  217. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  218. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  219. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/schema.py +0 -0
  220. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/template.py +0 -0
  221. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/base_metric.py +0 -0
  222. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/__init__.py +0 -0
  223. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/bias.py +0 -0
  224. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/schema.py +0 -0
  225. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/template.py +0 -0
  226. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  227. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  228. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/schema.py +0 -0
  229. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/template.py +0 -0
  230. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  231. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  232. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/schema.py +0 -0
  233. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/template.py +0 -0
  234. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  235. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  236. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  237. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  238. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  239. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  240. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  241. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/template.py +0 -0
  242. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  243. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  244. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  245. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/templates.py +0 -0
  246. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  247. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  248. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  249. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  250. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/__init__.py +0 -0
  251. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/dag.py +0 -0
  252. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/graph.py +0 -0
  253. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/nodes.py +0 -0
  254. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/schema.py +0 -0
  255. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/templates.py +0 -0
  256. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/utils.py +0 -0
  257. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/__init__.py +0 -0
  258. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  259. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/schema.py +0 -0
  260. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/template.py +0 -0
  261. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/__init__.py +0 -0
  262. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/g_eval.py +0 -0
  263. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/schema.py +0 -0
  264. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/template.py +0 -0
  265. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/utils.py +0 -0
  266. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/__init__.py +0 -0
  267. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/hallucination.py +0 -0
  268. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/schema.py +0 -0
  269. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/template.py +0 -0
  270. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/indicator.py +0 -0
  271. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/__init__.py +0 -0
  272. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  273. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/schema.py +0 -0
  274. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/template.py +0 -0
  275. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  276. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  277. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  278. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/template.py +0 -0
  279. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/__init__.py +0 -0
  280. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  281. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  282. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/schema.py +0 -0
  283. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/template.py +0 -0
  284. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  285. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  286. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  287. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  288. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/__init__.py +0 -0
  289. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/misuse.py +0 -0
  290. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/schema.py +0 -0
  291. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/template.py +0 -0
  292. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  293. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  294. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  295. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  296. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  297. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  298. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  299. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  300. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  301. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  302. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  303. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  304. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  305. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  306. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  307. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  308. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  309. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
  310. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
  311. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
  312. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
  313. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
  314. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
  315. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
  316. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
  317. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
  318. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
  319. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
  320. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
  321. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
  322. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
  323. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
  324. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
  325. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  326. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
  327. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
  328. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
  329. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  330. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
  331. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  332. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
  333. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
  334. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  335. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
  336. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  337. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  338. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  339. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  340. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/__init__.py +0 -0
  341. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/non_advice.py +0 -0
  342. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/schema.py +0 -0
  343. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/template.py +0 -0
  344. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  345. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  346. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/schema.py +0 -0
  347. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/template.py +0 -0
  348. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  349. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  350. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  351. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/template.py +0 -0
  352. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/ragas.py +0 -0
  353. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/__init__.py +0 -0
  354. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  355. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/schema.py +0 -0
  356. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/template.py +0 -0
  357. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/__init__.py +0 -0
  358. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/role_violation.py +0 -0
  359. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/schema.py +0 -0
  360. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/template.py +0 -0
  361. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/__init__.py +0 -0
  362. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/schema.py +0 -0
  363. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/summarization.py +0 -0
  364. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/template.py +0 -0
  365. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/__init__.py +0 -0
  366. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/schema.py +0 -0
  367. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/task_completion.py +0 -0
  368. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/template.py +0 -0
  369. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  370. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  371. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/__init__.py +0 -0
  372. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/schema.py +0 -0
  373. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/template.py +0 -0
  374. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/toxicity.py +0 -0
  375. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  376. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  377. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/template.py +0 -0
  378. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  379. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/utils.py +0 -0
  380. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/__init__.py +0 -0
  381. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/_summac_model.py +0 -0
  382. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/answer_relevancy_model.py +0 -0
  383. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/base_model.py +0 -0
  384. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/detoxify_model.py +0 -0
  385. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/__init__.py +0 -0
  386. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
  387. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
  388. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
  389. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
  390. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/hallucination_model.py +0 -0
  391. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/__init__.py +0 -0
  392. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
  393. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/anthropic_model.py +0 -0
  394. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/azure_model.py +0 -0
  395. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/deepseek_model.py +0 -0
  396. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/gemini_model.py +0 -0
  397. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/grok_model.py +0 -0
  398. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/kimi_model.py +0 -0
  399. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/litellm_model.py +0 -0
  400. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/local_model.py +0 -0
  401. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/ollama_model.py +0 -0
  402. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/openai_model.py +0 -0
  403. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/utils.py +0 -0
  404. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/__init__.py +0 -0
  405. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/gemini_model.py +0 -0
  406. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/ollama_model.py +0 -0
  407. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/openai_model.py +0 -0
  408. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/retry_policy.py +0 -0
  409. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/summac_model.py +0 -0
  410. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/unbias_model.py +0 -0
  411. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/utils.py +0 -0
  412. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/__init__.py +0 -0
  413. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/extractors.py +0 -0
  414. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/patch.py +0 -0
  415. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/utils.py +0 -0
  416. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/plugins/__init__.py +0 -0
  417. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/plugins/plugin.py +0 -0
  418. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/progress_context.py +0 -0
  419. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/__init__.py +0 -0
  420. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/api.py +0 -0
  421. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/prompt.py +0 -0
  422. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/utils.py +0 -0
  423. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/py.typed +0 -0
  424. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/red_teaming/README.md +0 -0
  425. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/scorer/__init__.py +0 -0
  426. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/scorer/scorer.py +0 -0
  427. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/__init__.py +0 -0
  428. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/conversation_simulator.py +0 -0
  429. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/schema.py +0 -0
  430. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/template.py +0 -0
  431. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/singleton.py +0 -0
  432. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/__init__.py +0 -0
  433. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/base_synthesizer.py +0 -0
  434. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/__init__.py +0 -0
  435. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  436. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  437. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/config.py +0 -0
  438. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/schema.py +0 -0
  439. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/synthesizer.py +0 -0
  440. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/__init__.py +0 -0
  441. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template.py +0 -0
  442. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  443. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  444. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/types.py +0 -0
  445. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/utils.py +0 -0
  446. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/telemetry.py +0 -0
  447. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/__init__.py +0 -0
  448. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/arena_test_case.py +0 -0
  449. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/conversational_test_case.py +0 -0
  450. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/llm_test_case.py +0 -0
  451. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/mcp.py +0 -0
  452. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/mllm_test_case.py +0 -0
  453. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/utils.py +0 -0
  454. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/__init__.py +0 -0
  455. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/api.py +0 -0
  456. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/cache.py +0 -0
  457. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/hooks.py +0 -0
  458. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/hyperparameters.py +0 -0
  459. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/test_run.py +0 -0
  460. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/__init__.py +0 -0
  461. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/api.py +0 -0
  462. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/__init__.py +0 -0
  463. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/api.py +0 -0
  464. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/span.py +0 -0
  465. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/thread.py +0 -0
  466. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/trace.py +0 -0
  467. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/__init__.py +0 -0
  468. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/exporter.py +0 -0
  469. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/utils.py +0 -0
  470. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/patchers.py +0 -0
  471. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  472. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/types.py +0 -0
  473. {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.5.9
3
+ Version: 3.6.0
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.6.0"
@@ -15,7 +15,7 @@ from enum import Enum
15
15
  from pydantic import SecretStr
16
16
  from deepeval.config.settings import get_settings, _SAVE_RE
17
17
  from deepeval.cli.dotenv_handler import DotenvHandler
18
- from deepeval.utils import bool_to_env_str
18
+ from deepeval.config.utils import bool_to_env_str
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
  StrOrEnum = Union[str, Enum]
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from contextvars import ContextVar
4
+ from typing import TYPE_CHECKING, Optional
5
+
6
+
7
+ if TYPE_CHECKING:
8
+ from deepeval.dataset.golden import Golden
9
+
10
+
11
+ CURRENT_GOLDEN: ContextVar[Optional[Golden]] = ContextVar(
12
+ "CURRENT_GOLDEN", default=None
13
+ )
14
+
15
+
16
+ def set_current_golden(golden: Optional[Golden]):
17
+ return CURRENT_GOLDEN.set(golden)
18
+
19
+
20
+ def get_current_golden() -> Optional[Golden]:
21
+ return CURRENT_GOLDEN.get()
22
+
23
+
24
+ def reset_current_golden(token) -> None:
25
+ CURRENT_GOLDEN.reset(token)
@@ -0,0 +1,11 @@
1
+ from deepeval.contextvars import get_current_golden
2
+ from .dataset import EvaluationDataset
3
+ from .golden import Golden, ConversationalGolden
4
+
5
+
6
+ __all__ = [
7
+ "EvaluationDataset",
8
+ "Golden",
9
+ "ConversationalGolden",
10
+ "get_current_golden",
11
+ ]
@@ -42,6 +42,7 @@ from deepeval.tracing.api import (
42
42
  BaseApiSpan,
43
43
  )
44
44
  from deepeval.dataset import Golden
45
+ from deepeval.contextvars import set_current_golden, reset_current_golden
45
46
  from deepeval.errors import MissingTestCaseParamsError
46
47
  from deepeval.metrics.utils import copy_metrics
47
48
  from deepeval.utils import (
@@ -1480,6 +1481,7 @@ def execute_agentic_test_cases_from_loop(
1480
1481
  )
1481
1482
 
1482
1483
  for golden in goldens:
1484
+ token = set_current_golden(golden)
1483
1485
  with capture_evaluation_run("golden"):
1484
1486
  # yield golden
1485
1487
  count += 1
@@ -1492,8 +1494,14 @@ def execute_agentic_test_cases_from_loop(
1492
1494
  _progress=progress,
1493
1495
  _pbar_callback_id=pbar_tags_id,
1494
1496
  ):
1495
- yield golden
1496
- current_trace: Trace = current_trace_context.get()
1497
+ try:
1498
+ # yield golden to user code
1499
+ yield golden
1500
+ # control has returned from user code without error, capture trace now
1501
+ current_trace: Trace = current_trace_context.get()
1502
+ finally:
1503
+ # after user code returns control, always reset the context
1504
+ reset_current_golden(token)
1497
1505
 
1498
1506
  update_pbar(progress, pbar_tags_id)
1499
1507
  update_pbar(progress, pbar_id)
@@ -1849,6 +1857,7 @@ def a_execute_agentic_test_cases_from_loop(
1849
1857
 
1850
1858
  try:
1851
1859
  for index, golden in enumerate(goldens):
1860
+ token = set_current_golden(golden)
1852
1861
  current_golden_ctx.update(
1853
1862
  {
1854
1863
  "index": index,
@@ -1857,7 +1866,10 @@ def a_execute_agentic_test_cases_from_loop(
1857
1866
  }
1858
1867
  )
1859
1868
  prev_task_length = len(created_tasks)
1860
- yield golden
1869
+ try:
1870
+ yield golden
1871
+ finally:
1872
+ reset_current_golden(token)
1861
1873
  # if this golden created no tasks, bump bars now
1862
1874
  if len(created_tasks) == prev_task_length:
1863
1875
  update_pbar(progress, pbar_callback_id)
@@ -1,6 +1,7 @@
1
1
  from deepeval.openai_agents.callback_handler import DeepEvalTracingProcessor
2
- from deepeval.openai_agents.runner import Runner
3
- from deepeval.openai_agents.patch import function_tool
4
2
  from deepeval.openai_agents.agent import DeepEvalAgent as Agent
3
+ from deepeval.openai_agents.patch import function_tool
4
+
5
+ # from deepeval.openai_agents.runner import Runner
5
6
 
6
- __all__ = ["DeepEvalTracingProcessor", "Runner", "function_tool", "Agent"]
7
+ __all__ = ["DeepEvalTracingProcessor", "Agent", "function_tool"]
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Generic, TypeVar, List
5
+
6
+ from deepeval.prompt import Prompt
7
+ from deepeval.metrics import BaseMetric
8
+ from deepeval.tracing.types import LlmSpan
9
+
10
+ try:
11
+ from agents.agent import Agent as BaseAgent
12
+ from deepeval.openai_agents.patch import (
13
+ patch_default_agent_runner_get_model,
14
+ )
15
+ except Exception as e:
16
+ raise RuntimeError(
17
+ "openai-agents is required for this integration. Please install it."
18
+ ) from e
19
+
20
+ TContext = TypeVar("TContext")
21
+
22
+
23
+ @dataclass
24
+ class DeepEvalAgent(BaseAgent[TContext], Generic[TContext]):
25
+ """
26
+ A subclass of agents.Agent.
27
+ """
28
+
29
+ llm_metric_collection: str = None
30
+ llm_metrics: List[BaseMetric] = None
31
+ confident_prompt: Prompt = None
32
+ agent_metrics: List[BaseMetric] = None
33
+ agent_metric_collection: str = None
34
+
35
+ def __post_init__(self):
36
+ patch_default_agent_runner_get_model()
@@ -0,0 +1,135 @@
1
+ from deepeval.tracing.tracing import (
2
+ Observer,
3
+ current_span_context,
4
+ trace_manager,
5
+ )
6
+ from deepeval.openai_agents.extractors import *
7
+ from deepeval.tracing.context import current_trace_context
8
+ from deepeval.tracing.utils import make_json_serializable
9
+ from time import perf_counter
10
+ from deepeval.tracing.types import TraceSpanStatus
11
+
12
+ try:
13
+ from agents.tracing import Span, Trace, TracingProcessor
14
+ from agents.tracing.span_data import (
15
+ AgentSpanData,
16
+ CustomSpanData,
17
+ FunctionSpanData,
18
+ GenerationSpanData,
19
+ GuardrailSpanData,
20
+ HandoffSpanData,
21
+ ResponseSpanData,
22
+ SpanData,
23
+ )
24
+ from deepeval.openai_agents.patch import (
25
+ patch_default_agent_run_single_turn,
26
+ patch_default_agent_run_single_turn_streamed,
27
+ )
28
+
29
+ openai_agents_available = True
30
+ except ImportError:
31
+ openai_agents_available = False
32
+
33
+
34
+ def _check_openai_agents_available():
35
+ if not openai_agents_available:
36
+ raise ImportError(
37
+ "openai-agents is required for this integration. Install it via your package manager"
38
+ )
39
+
40
+
41
+ class DeepEvalTracingProcessor(TracingProcessor):
42
+ def __init__(self) -> None:
43
+ _check_openai_agents_available()
44
+ patch_default_agent_run_single_turn()
45
+ patch_default_agent_run_single_turn_streamed()
46
+ self.span_observers: dict[str, Observer] = {}
47
+
48
+ def on_trace_start(self, trace: "Trace") -> None:
49
+ trace_dict = trace.export()
50
+ _trace_uuid = trace_dict.get("id")
51
+ _thread_id = trace_dict.get("group_id")
52
+ _trace_name = trace_dict.get("workflow_name")
53
+ _trace_metadata = trace_dict.get("metadata")
54
+
55
+ _trace = trace_manager.start_new_trace(trace_uuid=str(_trace_uuid))
56
+ _trace.thread_id = str(_thread_id)
57
+ _trace.name = str(_trace_name)
58
+ _trace.metadata = make_json_serializable(_trace_metadata)
59
+ current_trace_context.set(_trace)
60
+
61
+ trace_manager.add_span( # adds a dummy root span
62
+ BaseSpan(
63
+ uuid=_trace_uuid,
64
+ trace_uuid=_trace_uuid,
65
+ parent_uuid=None,
66
+ start_time=perf_counter(),
67
+ name=_trace_name,
68
+ status=TraceSpanStatus.IN_PROGRESS,
69
+ children=[],
70
+ )
71
+ )
72
+
73
+ def on_trace_end(self, trace: "Trace") -> None:
74
+ trace_dict = trace.export()
75
+ _trace_uuid = trace_dict.get("id")
76
+ _trace_name = trace_dict.get("workflow_name")
77
+
78
+ trace_manager.remove_span(_trace_uuid) # removing the dummy root span
79
+ trace_manager.end_trace(_trace_uuid)
80
+ current_trace_context.set(None)
81
+
82
+ def on_span_start(self, span: "Span") -> None:
83
+ if not span.started_at:
84
+ return
85
+ current_span = current_span_context.get()
86
+ if current_span and isinstance(current_span, LlmSpan):
87
+ return
88
+
89
+ span_type = self.get_span_kind(span.span_data)
90
+ observer = Observer(span_type=span_type, func_name="NA")
91
+ if span_type == "llm":
92
+ observer.observe_kwargs["model"] = "temporary model"
93
+ observer.update_span_properties = (
94
+ lambda span_type: update_span_properties(span_type, span.span_data)
95
+ )
96
+ self.span_observers[span.span_id] = observer
97
+ observer.__enter__()
98
+
99
+ def on_span_end(self, span: "Span") -> None:
100
+ update_trace_properties_from_span_data(
101
+ current_trace_context.get(), span.span_data
102
+ )
103
+
104
+ current_span = current_span_context.get()
105
+ if current_span and isinstance(current_span, LlmSpan):
106
+ update_span_properties(current_span, span.span_data)
107
+ return
108
+ observer = self.span_observers.pop(span.span_id, None)
109
+ if observer:
110
+ observer.__exit__(None, None, None)
111
+
112
+ def force_flush(self) -> None:
113
+ pass
114
+
115
+ def shutdown(self) -> None:
116
+ pass
117
+
118
+ def get_span_kind(self, span_data: "SpanData") -> str:
119
+ if isinstance(span_data, AgentSpanData):
120
+ return "agent"
121
+ if isinstance(span_data, FunctionSpanData):
122
+ return "tool"
123
+ if isinstance(span_data, MCPListToolsSpanData):
124
+ return "tool"
125
+ if isinstance(span_data, GenerationSpanData):
126
+ return "llm"
127
+ if isinstance(span_data, ResponseSpanData):
128
+ return "llm"
129
+ if isinstance(span_data, HandoffSpanData):
130
+ return "custom"
131
+ if isinstance(span_data, CustomSpanData):
132
+ return "base"
133
+ if isinstance(span_data, GuardrailSpanData):
134
+ return "base"
135
+ return "base"
@@ -1,9 +1,10 @@
1
+ from deepeval.tracing.types import Trace
1
2
  from openai.types.responses.response_input_item_param import (
2
3
  FunctionCallOutput,
3
4
  Message,
4
5
  )
5
6
  from openai.types.responses.response_output_message_param import Content
6
- from typing import Union, List
7
+ from typing import Union, List, Optional
7
8
  from openai.types.responses import (
8
9
  ResponseFunctionToolCallParam,
9
10
  ResponseOutputMessageParam,
@@ -25,6 +26,8 @@ from deepeval.tracing.types import (
25
26
  )
26
27
  import json
27
28
 
29
+ from deepeval.tracing.utils import make_json_serializable
30
+
28
31
  try:
29
32
  from agents import MCPListToolsSpanData
30
33
  from agents.tracing.span_data import (
@@ -89,13 +92,17 @@ def update_span_properties_from_response_span_data(
89
92
  return
90
93
  # Extract usage tokens
91
94
  usage = response.usage
95
+ cached_input_tokens = None
96
+ ouptut_reasoning_tokens = None
92
97
  if usage:
93
98
  output_tokens = usage.output_tokens
94
99
  input_tokens = usage.input_tokens
95
100
  cached_input_tokens = usage.input_tokens_details.cached_tokens
96
101
  ouptut_reasoning_tokens = usage.output_tokens_details.reasoning_tokens
97
102
  # Get input and output
98
- input = parse_response_input(span_data.input)
103
+ input = parse_response_input(
104
+ span_data.input, span_data.response.instructions
105
+ )
99
106
  raw_output = parse_response_output(response.output)
100
107
  output = (
101
108
  raw_output if isinstance(raw_output, str) else json.dumps(raw_output)
@@ -112,6 +119,23 @@ def update_span_properties_from_response_span_data(
112
119
  span.input = input
113
120
  span.output = output
114
121
  span.name = "LLM Generation"
122
+ response_dict = response.model_dump(exclude_none=True, mode="json")
123
+ span.metadata["invocation_params"] = {
124
+ k: v
125
+ for k, v in response_dict.items()
126
+ if k
127
+ in (
128
+ "max_output_tokens",
129
+ "parallel_tool_calls",
130
+ "reasoning",
131
+ "temperature",
132
+ "text",
133
+ "tool_choice",
134
+ "tools",
135
+ "top_p",
136
+ "truncation",
137
+ )
138
+ }
115
139
 
116
140
 
117
141
  def update_span_properties_from_generation_span_data(
@@ -136,6 +160,11 @@ def update_span_properties_from_generation_span_data(
136
160
  span.input = input
137
161
  span.output = output
138
162
  span.name = "LLM Generation"
163
+ span.metadata["invocation_params"] = {
164
+ "model_config": make_json_serializable(
165
+ generation_span_data.model_config
166
+ ),
167
+ }
139
168
 
140
169
 
141
170
  ########################################################
@@ -191,8 +220,6 @@ def update_span_properties_from_agent_span_data(
191
220
  if agent_span_data.output_type:
192
221
  metadata["output_type"] = agent_span_data.output_type
193
222
  span.metadata = metadata
194
- span.input = None
195
- span.output = None
196
223
 
197
224
 
198
225
  ########################################################
@@ -238,10 +265,30 @@ def update_span_properties_from_guardrail_span_data(
238
265
  ########################################################
239
266
 
240
267
 
241
- def parse_response_input(input: Union[str, List[ResponseInputItemParam]]):
242
- if isinstance(input, str):
243
- return input
268
+ def parse_response_input(
269
+ input: Union[str, List[ResponseInputItemParam]],
270
+ instructions: Optional[Union[str, List[ResponseInputItemParam]]] = None,
271
+ ):
272
+
244
273
  processed_input = []
274
+
275
+ if isinstance(input, str) and isinstance(instructions, str):
276
+ return [
277
+ {"type": "message", "role": "system", "content": instructions},
278
+ {"type": "message", "role": "user", "content": input},
279
+ ]
280
+ elif isinstance(input, list) and isinstance(instructions, list):
281
+ input = instructions + input
282
+ elif isinstance(input, list) and isinstance(instructions, str):
283
+ processed_input += [
284
+ {"type": "message", "role": "system", "content": instructions}
285
+ ]
286
+ elif isinstance(input, str) and isinstance(instructions, list):
287
+ processed_input += [
288
+ {"type": "message", "role": "user", "content": input}
289
+ ]
290
+ input = instructions
291
+
245
292
  for item in input:
246
293
  if "type" not in item:
247
294
  if "role" in item and "content" in item:
@@ -365,3 +412,32 @@ def parse_function_call(
365
412
  "name": function_call.name,
366
413
  "arguments": function_call.arguments,
367
414
  }
415
+
416
+
417
+ def update_trace_properties_from_span_data(
418
+ trace: Trace,
419
+ span_data: Union["ResponseSpanData", "GenerationSpanData"],
420
+ ):
421
+ if isinstance(span_data, ResponseSpanData):
422
+ if not trace.input:
423
+ trace.input = parse_response_input(
424
+ span_data.input, span_data.response.instructions
425
+ )
426
+ raw_output = parse_response_output(span_data.response.output)
427
+ output = (
428
+ raw_output
429
+ if isinstance(raw_output, str)
430
+ else json.dumps(raw_output)
431
+ )
432
+ trace.output = output
433
+
434
+ elif isinstance(span_data, GenerationSpanData):
435
+ if not trace.input:
436
+ trace.input = span_data.input
437
+ raw_output = span_data.output
438
+ output = (
439
+ raw_output
440
+ if isinstance(raw_output, str)
441
+ else json.dumps(raw_output)
442
+ )
443
+ trace.output = output