deepeval 3.4.6__tar.gz → 3.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (457) hide show
  1. {deepeval-3.4.6 → deepeval-3.4.7}/PKG-INFO +26 -1
  2. {deepeval-3.4.6 → deepeval-3.4.7}/README.md +24 -0
  3. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/__init__.py +7 -2
  4. deepeval-3.4.7/deepeval/_version.py +1 -0
  5. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/main.py +1 -1
  6. deepeval-3.4.7/deepeval/env.py +35 -0
  7. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/utils.py +7 -1
  8. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/__init__.py +1 -0
  9. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/callback.py +4 -0
  10. deepeval-3.4.7/deepeval/integrations/langchain/patch.py +32 -0
  11. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/agent.py +91 -1
  12. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/setup.py +0 -1
  13. deepeval-3.4.7/deepeval/openai_agents/__init__.py +6 -0
  14. deepeval-3.4.7/deepeval/openai_agents/agent.py +184 -0
  15. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai_agents/callback_handler.py +28 -21
  16. deepeval-3.4.7/deepeval/openai_agents/patch.py +115 -0
  17. deepeval-3.4.7/deepeval/openai_agents/runner.py +114 -0
  18. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/telemetry.py +2 -2
  19. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/api.py +11 -0
  20. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/context.py +5 -0
  21. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/tracing.py +20 -0
  22. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/types.py +2 -0
  23. {deepeval-3.4.6 → deepeval-3.4.7}/pyproject.toml +3 -2
  24. deepeval-3.4.6/deepeval/_version.py +0 -1
  25. deepeval-3.4.6/deepeval/openai_agents/__init__.py +0 -4
  26. {deepeval-3.4.6 → deepeval-3.4.7}/LICENSE.md +0 -0
  27. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/__init__.py +0 -0
  28. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/annotation.py +0 -0
  29. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/api.py +0 -0
  30. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/__init__.py +0 -0
  31. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/__init__.py +0 -0
  32. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/arc.py +0 -0
  33. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/mode.py +0 -0
  34. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/template.py +0 -0
  35. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/base_benchmark.py +0 -0
  36. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/__init__.py +0 -0
  37. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/bbq.py +0 -0
  38. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/task.py +0 -0
  39. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/template.py +0 -0
  40. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  41. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  42. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  43. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  44. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  45. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  46. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  47. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  48. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  49. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  50. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  51. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  52. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  53. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  54. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  55. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  56. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  57. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  58. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  59. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  60. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  61. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  62. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  63. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  64. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  65. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  66. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  67. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  68. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  69. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  70. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  71. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  72. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  73. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  74. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  75. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  76. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  77. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  78. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  79. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  80. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  81. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  82. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  83. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  84. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  85. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  86. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  87. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  88. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  89. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  90. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  91. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  92. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  93. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  94. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  95. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  96. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  97. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  98. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  99. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  100. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  101. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  102. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/template.py +0 -0
  103. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/__init__.py +0 -0
  104. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/drop.py +0 -0
  105. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/task.py +0 -0
  106. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/template.py +0 -0
  107. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  108. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  109. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  110. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  111. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  112. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  113. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/template.py +0 -0
  114. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  115. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  116. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/task.py +0 -0
  117. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/template.py +0 -0
  118. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  119. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  120. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/task.py +0 -0
  121. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/template.py +0 -0
  122. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  123. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  124. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/template.py +0 -0
  125. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/__init__.py +0 -0
  126. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/lambada.py +0 -0
  127. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/template.py +0 -0
  128. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  129. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  130. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/task.py +0 -0
  131. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/template.py +0 -0
  132. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  133. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  134. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/task.py +0 -0
  135. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/template.py +0 -0
  136. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  137. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  138. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/task.py +0 -0
  139. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/template.py +0 -0
  140. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/modes/__init__.py +0 -0
  141. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/results.py +0 -0
  142. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/schema.py +0 -0
  143. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/__init__.py +0 -0
  144. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/squad.py +0 -0
  145. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/task.py +0 -0
  146. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/template.py +0 -0
  147. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/tasks/__init__.py +0 -0
  148. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  149. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  150. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  151. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  152. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  153. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/utils.py +0 -0
  154. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  155. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/template.py +0 -0
  156. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  157. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/__init__.py +0 -0
  158. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/server.py +0 -0
  159. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/test.py +0 -0
  160. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/types.py +0 -0
  161. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/utils.py +0 -0
  162. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/__init__.py +0 -0
  163. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/api.py +0 -0
  164. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/types.py +0 -0
  165. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/constants.py +0 -0
  166. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/__init__.py +0 -0
  167. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/api.py +0 -0
  168. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/dataset.py +0 -0
  169. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/golden.py +0 -0
  170. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/types.py +0 -0
  171. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/utils.py +0 -0
  172. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/errors.py +0 -0
  173. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/__init__.py +0 -0
  174. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/api.py +0 -0
  175. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/compare.py +0 -0
  176. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/configs.py +0 -0
  177. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/evaluate.py +0 -0
  178. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/execute.py +0 -0
  179. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/types.py +0 -0
  180. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/__init__.py +0 -0
  181. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/__init__.py +0 -0
  182. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/agent.py +0 -0
  183. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/handler.py +0 -0
  184. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/patch.py +0 -0
  185. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/__init__.py +0 -0
  186. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/callback.py +0 -0
  187. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  188. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  189. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/utils.py +0 -0
  190. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/utils.py +0 -0
  191. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/__init__.py +0 -0
  192. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/agent/patched.py +0 -0
  193. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/handler.py +0 -0
  194. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/utils.py +0 -0
  195. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  196. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/key_handler.py +0 -0
  197. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/__init__.py +0 -0
  198. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  199. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  200. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  201. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/template.py +0 -0
  202. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  203. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  204. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  205. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/template.py +0 -0
  206. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  207. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  208. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  209. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/schema.py +0 -0
  210. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/template.py +0 -0
  211. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/base_metric.py +0 -0
  212. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/__init__.py +0 -0
  213. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/bias.py +0 -0
  214. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/schema.py +0 -0
  215. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/template.py +0 -0
  216. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  217. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  218. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/schema.py +0 -0
  219. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/template.py +0 -0
  220. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  221. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  222. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/schema.py +0 -0
  223. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/template.py +0 -0
  224. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  225. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  226. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  227. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  228. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  229. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  230. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  231. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/template.py +0 -0
  232. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  233. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  234. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  235. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  236. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/__init__.py +0 -0
  237. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/dag.py +0 -0
  238. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/graph.py +0 -0
  239. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/nodes.py +0 -0
  240. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/schema.py +0 -0
  241. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/templates.py +0 -0
  242. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/utils.py +0 -0
  243. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/__init__.py +0 -0
  244. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  245. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/schema.py +0 -0
  246. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/template.py +0 -0
  247. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/__init__.py +0 -0
  248. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/g_eval.py +0 -0
  249. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/schema.py +0 -0
  250. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/template.py +0 -0
  251. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/utils.py +0 -0
  252. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/__init__.py +0 -0
  253. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/hallucination.py +0 -0
  254. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/schema.py +0 -0
  255. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/template.py +0 -0
  256. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/indicator.py +0 -0
  257. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/__init__.py +0 -0
  258. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  259. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/schema.py +0 -0
  260. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/template.py +0 -0
  261. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  262. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  263. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  264. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/template.py +0 -0
  265. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/__init__.py +0 -0
  266. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  267. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  268. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/schema.py +0 -0
  269. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/template.py +0 -0
  270. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  271. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  272. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  273. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  274. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/__init__.py +0 -0
  275. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/misuse.py +0 -0
  276. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/schema.py +0 -0
  277. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/template.py +0 -0
  278. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  279. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  280. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  281. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  282. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  283. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  284. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  285. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  286. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  287. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  288. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  289. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  290. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  291. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  292. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  293. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  294. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  295. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
  296. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
  297. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
  298. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
  299. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
  300. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
  301. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
  302. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
  303. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
  304. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
  305. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
  306. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
  307. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
  308. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
  309. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
  310. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
  311. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  312. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
  313. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
  314. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
  315. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  316. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
  317. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  318. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
  319. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
  320. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  321. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
  322. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  323. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  324. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  325. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  326. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/__init__.py +0 -0
  327. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/non_advice.py +0 -0
  328. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/schema.py +0 -0
  329. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/template.py +0 -0
  330. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  331. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  332. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/schema.py +0 -0
  333. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/template.py +0 -0
  334. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  335. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  336. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  337. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/template.py +0 -0
  338. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/ragas.py +0 -0
  339. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/__init__.py +0 -0
  340. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  341. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/schema.py +0 -0
  342. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/template.py +0 -0
  343. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/__init__.py +0 -0
  344. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/role_violation.py +0 -0
  345. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/schema.py +0 -0
  346. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/template.py +0 -0
  347. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/__init__.py +0 -0
  348. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/schema.py +0 -0
  349. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/summarization.py +0 -0
  350. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/template.py +0 -0
  351. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/__init__.py +0 -0
  352. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/schema.py +0 -0
  353. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/task_completion.py +0 -0
  354. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/template.py +0 -0
  355. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  356. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  357. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/__init__.py +0 -0
  358. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/schema.py +0 -0
  359. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/template.py +0 -0
  360. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/toxicity.py +0 -0
  361. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  362. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  363. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/template.py +0 -0
  364. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  365. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/utils.py +0 -0
  366. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/__init__.py +0 -0
  367. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/_summac_model.py +0 -0
  368. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/answer_relevancy_model.py +0 -0
  369. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/base_model.py +0 -0
  370. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/detoxify_model.py +0 -0
  371. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/__init__.py +0 -0
  372. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
  373. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
  374. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
  375. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
  376. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/hallucination_model.py +0 -0
  377. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/__init__.py +0 -0
  378. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
  379. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/anthropic_model.py +0 -0
  380. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/azure_model.py +0 -0
  381. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/deepseek_model.py +0 -0
  382. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/gemini_model.py +0 -0
  383. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/grok_model.py +0 -0
  384. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/kimi_model.py +0 -0
  385. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/litellm_model.py +0 -0
  386. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/local_model.py +0 -0
  387. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/ollama_model.py +0 -0
  388. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/openai_model.py +0 -0
  389. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/utils.py +0 -0
  390. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/__init__.py +0 -0
  391. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/gemini_model.py +0 -0
  392. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/ollama_model.py +0 -0
  393. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/openai_model.py +0 -0
  394. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/summac_model.py +0 -0
  395. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/unbias_model.py +0 -0
  396. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/utils.py +0 -0
  397. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/__init__.py +0 -0
  398. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/extractors.py +0 -0
  399. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/patch.py +0 -0
  400. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/utils.py +0 -0
  401. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai_agents/extractors.py +0 -0
  402. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/plugins/__init__.py +0 -0
  403. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/plugins/plugin.py +0 -0
  404. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/progress_context.py +0 -0
  405. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/__init__.py +0 -0
  406. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/api.py +0 -0
  407. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/prompt.py +0 -0
  408. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/utils.py +0 -0
  409. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/py.typed +0 -0
  410. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/red_teaming/README.md +0 -0
  411. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/scorer/__init__.py +0 -0
  412. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/scorer/scorer.py +0 -0
  413. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/__init__.py +0 -0
  414. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/conversation_simulator.py +0 -0
  415. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/schema.py +0 -0
  416. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/template.py +0 -0
  417. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/singleton.py +0 -0
  418. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/__init__.py +0 -0
  419. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/base_synthesizer.py +0 -0
  420. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/__init__.py +0 -0
  421. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  422. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  423. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/config.py +0 -0
  424. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/schema.py +0 -0
  425. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/synthesizer.py +0 -0
  426. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/__init__.py +0 -0
  427. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template.py +0 -0
  428. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  429. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  430. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/types.py +0 -0
  431. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/utils.py +0 -0
  432. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/__init__.py +0 -0
  433. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/arena_test_case.py +0 -0
  434. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/conversational_test_case.py +0 -0
  435. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/llm_test_case.py +0 -0
  436. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/mcp.py +0 -0
  437. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/mllm_test_case.py +0 -0
  438. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/utils.py +0 -0
  439. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/__init__.py +0 -0
  440. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/api.py +0 -0
  441. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/cache.py +0 -0
  442. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/hooks.py +0 -0
  443. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/hyperparameters.py +0 -0
  444. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/test_run.py +0 -0
  445. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/__init__.py +0 -0
  446. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/__init__.py +0 -0
  447. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/api.py +0 -0
  448. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/span.py +0 -0
  449. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/thread.py +0 -0
  450. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/trace.py +0 -0
  451. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/__init__.py +0 -0
  452. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/exporter.py +0 -0
  453. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/utils.py +0 -0
  454. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/patchers.py +0 -0
  455. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  456. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/utils.py +0 -0
  457. {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.4.6
3
+ Version: 3.4.7
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -31,6 +31,7 @@ Requires-Dist: pytest-asyncio
31
31
  Requires-Dist: pytest-repeat
32
32
  Requires-Dist: pytest-rerunfailures (>=12.0,<13.0)
33
33
  Requires-Dist: pytest-xdist
34
+ Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
34
35
  Requires-Dist: requests (>=2.31.0,<3.0.0)
35
36
  Requires-Dist: rich (>=13.6.0,<15.0.0)
36
37
  Requires-Dist: sentry-sdk
@@ -186,6 +187,16 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
186
187
  ```
187
188
  pip install -U deepeval
188
189
  ```
190
+ ### Environment variables (.env / .env.local)
191
+
192
+ DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
193
+ **Precedence:** process env -> `.env.local` -> `.env`.
194
+ Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
195
+
196
+ ```bash
197
+ cp .env.example .env.local
198
+ # then edit .env.local (ignored by git)
199
+ ```
189
200
 
190
201
  ## Create an account (highly recommended)
191
202
 
@@ -411,6 +422,20 @@ You should see a link displayed in the CLI once the test has finished running. P
411
422
 
412
423
  <br />
413
424
 
425
+ ## Configuration
426
+
427
+ ### Environment variables via .env files
428
+
429
+ Using `.env.local` or `.env` is optional. If they are missing, DeepEval uses your existing environment variables. When present, dotenv environment variables are auto-loaded at import time (unless you set `DEEPEVAL_DISABLE_DOTENV=1`).
430
+
431
+ **Precedence:** process env -> `.env.local` -> `.env`
432
+
433
+ ```bash
434
+ cp .env.example .env.local
435
+ # then edit .env.local (ignored by git)
436
+
437
+ <br />
438
+
414
439
  # Contributing
415
440
 
416
441
  Please read [CONTRIBUTING.md](https://github.com/confident-ai/deepeval/blob/main/CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us.
@@ -140,6 +140,16 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
140
140
  ```
141
141
  pip install -U deepeval
142
142
  ```
143
+ ### Environment variables (.env / .env.local)
144
+
145
+ DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
146
+ **Precedence:** process env -> `.env.local` -> `.env`.
147
+ Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
148
+
149
+ ```bash
150
+ cp .env.example .env.local
151
+ # then edit .env.local (ignored by git)
152
+ ```
143
153
 
144
154
  ## Create an account (highly recommended)
145
155
 
@@ -365,6 +375,20 @@ You should see a link displayed in the CLI once the test has finished running. P
365
375
 
366
376
  <br />
367
377
 
378
+ ## Configuration
379
+
380
+ ### Environment variables via .env files
381
+
382
+ Using `.env.local` or `.env` is optional. If they are missing, DeepEval uses your existing environment variables. When present, dotenv environment variables are auto-loaded at import time (unless you set `DEEPEVAL_DISABLE_DOTENV=1`).
383
+
384
+ **Precedence:** process env -> `.env.local` -> `.env`
385
+
386
+ ```bash
387
+ cp .env.example .env.local
388
+ # then edit .env.local (ignored by git)
389
+
390
+ <br />
391
+
368
392
  # Contributing
369
393
 
370
394
  Please read [CONTRIBUTING.md](https://github.com/confident-ai/deepeval/blob/main/CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us.
@@ -2,6 +2,11 @@ import os
2
2
  import warnings
3
3
  import re
4
4
 
5
+ # load environment variables before other imports
6
+ from .env import autoload_dotenv as _autoload_dotenv
7
+
8
+ _autoload_dotenv()
9
+
5
10
  # Optionally add telemetry
6
11
  from ._version import __version__
7
12
 
@@ -11,7 +16,7 @@ from deepeval.test_run import on_test_run_end, log_hyperparameters
11
16
  from deepeval.utils import login
12
17
  from deepeval.telemetry import *
13
18
 
14
- if os.getenv("DEEPEVAL_GRPC_LOGGING") != "YES":
19
+ if os.getenv("DEEPEVAL_GRPC_LOGGING") != "1":
15
20
  os.environ["GRPC_VERBOSITY"] = "ERROR"
16
21
  os.environ["GRPC_TRACE"] = ""
17
22
 
@@ -61,7 +66,7 @@ def check_for_update():
61
66
 
62
67
 
63
68
  def update_warning_opt_in():
64
- return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "YES"
69
+ return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "1"
65
70
 
66
71
 
67
72
  def is_read_only_env():
@@ -0,0 +1 @@
1
+ __version__: str = "3.4.7"
@@ -154,7 +154,7 @@ def view():
154
154
 
155
155
  @app.command(name="enable-grpc-logging")
156
156
  def enable_grpc_logging():
157
- os.environ["DEEPEVAL_GRPC_LOGGING"] = "YES"
157
+ os.environ["DEEPEVAL_GRPC_LOGGING"] = "1"
158
158
 
159
159
 
160
160
  #############################################
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+ import os
3
+
4
+ try:
5
+ from dotenv import load_dotenv, find_dotenv # type: ignore
6
+ except Exception:
7
+ load_dotenv = None
8
+ find_dotenv = None
9
+
10
+
11
+ def autoload_dotenv() -> None:
12
+ """
13
+ Autoload environment variables for DeepEval at import time.
14
+
15
+ Precedence from highest -> lowest:
16
+ 1) Existing process environment variables
17
+ 2) .env.local (from current working directory)
18
+ 3) .env (from current working directory)
19
+
20
+ Behavior:
21
+ - Loads .env.local then .env if present, without overriding existing vars.
22
+ - Opt-out by setting DEEPEVAL_DISABLE_DOTENV=1.
23
+ - Soft-fails cleanly if python-dotenv is not installed.
24
+ """
25
+ if os.getenv("DEEPEVAL_DISABLE_DOTENV") == "1":
26
+ return
27
+
28
+ if not (load_dotenv and find_dotenv):
29
+ return
30
+
31
+ for name in (".env.local", ".env"):
32
+ path = find_dotenv(name, usecwd=True)
33
+ if path:
34
+ # Don't override previously set values
35
+ load_dotenv(path, override=False)
@@ -24,8 +24,9 @@ from deepeval.test_run import (
24
24
  MetricData,
25
25
  )
26
26
  from deepeval.evaluate.types import TestResult
27
- from deepeval.tracing.api import TraceApi, BaseApiSpan
27
+ from deepeval.tracing.api import TraceApi, BaseApiSpan, TraceSpanApiStatus
28
28
  from deepeval.tracing.tracing import BaseSpan, Trace
29
+ from deepeval.tracing.types import TraceSpanStatus
29
30
  from deepeval.constants import PYTEST_RUN_TEST_NAME
30
31
  from deepeval.tracing.utils import (
31
32
  perf_counter_to_datetime,
@@ -247,6 +248,11 @@ def create_api_trace(trace: Trace, golden: Golden) -> TraceApi:
247
248
  tools_called=trace.tools_called,
248
249
  expected_tools=trace.expected_tools,
249
250
  metadata=golden.additional_metadata,
251
+ status=(
252
+ TraceSpanApiStatus.SUCCESS
253
+ if trace.status == TraceSpanStatus.SUCCESS
254
+ else TraceSpanApiStatus.ERRORED
255
+ ),
250
256
  )
251
257
 
252
258
 
@@ -1,4 +1,5 @@
1
1
  from .callback import CallbackHandler
2
+ from .patch import tool
2
3
 
3
4
 
4
5
  __all__ = ["CallbackHandler"]
@@ -248,6 +248,8 @@ class CallbackHandler(BaseCallbackHandler):
248
248
  model=model,
249
249
  # fallback for on_end callback
250
250
  end_time=perf_counter(),
251
+ metric_collection=metadata.get("metric_collection", None),
252
+ metrics=metadata.get("metrics", None),
251
253
  )
252
254
 
253
255
  self.add_span_to_trace(llm_span)
@@ -348,6 +350,8 @@ class CallbackHandler(BaseCallbackHandler):
348
350
  ),
349
351
  # fallback for on_end callback
350
352
  end_time=perf_counter(),
353
+ metric_collection=metadata.get("metric_collection", None),
354
+ metrics=metadata.get("metrics", None),
351
355
  )
352
356
  self.add_span_to_trace(tool_span)
353
357
 
@@ -0,0 +1,32 @@
1
+ from langchain_core.tools import tool as original_tool, BaseTool
2
+ from deepeval.metrics import BaseMetric
3
+ from typing import List, Optional, Callable, Any
4
+ from functools import wraps
5
+
6
+
7
+ def tool(
8
+ *args,
9
+ metrics: Optional[List[BaseMetric]] = None,
10
+ metric_collection: Optional[str] = None,
11
+ **kwargs
12
+ ):
13
+ """
14
+ Patched version of langchain_core.tools.tool that prints inputs and outputs
15
+ """
16
+
17
+ # original_tool returns a decorator function, so we need to return a decorator
18
+ def decorator(func: Callable) -> BaseTool:
19
+
20
+ # Apply the original tool decorator to get the BaseTool
21
+ tool_instance = original_tool(*args, **kwargs)(func)
22
+
23
+ if isinstance(tool_instance, BaseTool):
24
+ if tool_instance.metadata is None:
25
+ tool_instance.metadata = {}
26
+
27
+ tool_instance.metadata["metric_collection"] = metric_collection
28
+ tool_instance.metadata["metrics"] = metrics
29
+
30
+ return tool_instance
31
+
32
+ return decorator
@@ -1,6 +1,6 @@
1
1
  from deepeval.telemetry import capture_tracing_integration
2
2
  from deepeval.metrics import BaseMetric
3
- from typing import List
3
+ from typing import List, Optional
4
4
  import functools
5
5
  import inspect
6
6
  import json
@@ -8,6 +8,7 @@ from deepeval.test_case import LLMTestCase
8
8
  from deepeval.tracing.types import TestCaseMetricPair
9
9
  from deepeval.tracing.tracing import trace_manager
10
10
  from deepeval.tracing.otel.utils import parse_string, parse_list_of_strings
11
+ from opentelemetry import trace
11
12
 
12
13
  try:
13
14
  from opentelemetry.trace import NoOpTracer
@@ -63,6 +64,95 @@ class PydanticAIAgent(Agent):
63
64
  # Patch the run method only for this instance
64
65
  self._patch_run_method()
65
66
  self._patch_run_method_sync()
67
+ self._patch_tool_decorator()
68
+
69
+ def _patch_tool_decorator(self):
70
+ """Patch the tool decorator to print input and output"""
71
+ original_tool = self.tool
72
+
73
+ @functools.wraps(original_tool)
74
+ def patched_tool(
75
+ *args,
76
+ metric_collection: Optional[str] = None,
77
+ metrics: Optional[List[BaseMetric]] = None,
78
+ **kwargs
79
+ ):
80
+
81
+ # Check if function is in args (direct decoration: @agent.tool)
82
+ if args and callable(args[0]):
83
+ original_func = args[0]
84
+ patched_func = self._create_patched_function(
85
+ original_func, metric_collection, metrics
86
+ )
87
+ new_args = (patched_func,) + args[1:]
88
+ result = original_tool(*new_args, **kwargs)
89
+ return result
90
+ else:
91
+ # Decorator called with parameters: @agent.tool(metric_collection="...")
92
+ # Return a decorator that will receive the function
93
+ def decorator_with_params(func):
94
+ patched_func = self._create_patched_function(
95
+ func, metric_collection, metrics
96
+ )
97
+ return original_tool(patched_func, **kwargs)
98
+
99
+ return decorator_with_params
100
+
101
+ # Replace the tool method for this instance
102
+ self.tool = patched_tool
103
+
104
+ def _create_patched_function(
105
+ self, original_func, metric_collection, metrics
106
+ ):
107
+ """Create a patched version of the function that adds tracing"""
108
+ if inspect.iscoroutinefunction(original_func):
109
+
110
+ @functools.wraps(original_func)
111
+ async def patched_async_func(*func_args, **func_kwargs):
112
+ result = await original_func(*func_args, **func_kwargs)
113
+
114
+ current_span = trace.get_current_span()
115
+ if current_span.is_recording():
116
+ try:
117
+ result_str = str(result)
118
+ except Exception:
119
+ result_str = ""
120
+ current_span.set_attribute(
121
+ "confident.span.output", result_str
122
+ )
123
+ if metric_collection:
124
+ current_span.set_attribute(
125
+ "confident.span.metric_collection",
126
+ metric_collection,
127
+ )
128
+ # TODO: add metrics in component level evals
129
+ return result
130
+
131
+ return patched_async_func
132
+ else:
133
+
134
+ @functools.wraps(original_func)
135
+ def patched_sync_func(*func_args, **func_kwargs):
136
+ result = original_func(*func_args, **func_kwargs)
137
+
138
+ current_span = trace.get_current_span()
139
+ if current_span.is_recording():
140
+ try:
141
+ result_str = str(result)
142
+ except Exception:
143
+ result_str = ""
144
+ current_span.set_attribute(
145
+ "confident.span.output", result_str
146
+ )
147
+ if metric_collection:
148
+ current_span.set_attribute(
149
+ "confident.span.metric_collection",
150
+ metric_collection,
151
+ )
152
+ # TODO: add metrics in component level evals
153
+ return result
154
+
155
+ return patched_sync_func
66
156
 
67
157
  def _patch_run_method(self):
68
158
  """Patch the Agent.run method only for this PydanticAIAgent instance"""
@@ -49,7 +49,6 @@ def instrument_pydantic_ai(api_key: Optional[str] = None):
49
49
  )
50
50
  )
51
51
  )
52
- trace.set_tracer_provider(tracer_provider)
53
52
 
54
53
  # create an instrumented exporter
55
54
  from pydantic_ai.models.instrumented import InstrumentationSettings
@@ -0,0 +1,6 @@
1
+ from deepeval.openai_agents.callback_handler import DeepEvalTracingProcessor
2
+ from deepeval.openai_agents.runner import Runner
3
+ from deepeval.openai_agents.patch import function_tool
4
+ from deepeval.openai_agents.agent import DeepEvalAgent as Agent
5
+
6
+ __all__ = ["DeepEvalTracingProcessor", "Runner", "function_tool", "Agent"]
@@ -0,0 +1,184 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field, replace
4
+ from typing import Any, Optional, Awaitable, Callable
5
+
6
+ from deepeval.tracing import observe
7
+ from deepeval.prompt import Prompt
8
+
9
+ try:
10
+ from agents.agent import Agent as BaseAgent
11
+ from agents.models.interface import Model, ModelProvider
12
+ except Exception as e:
13
+ raise RuntimeError(
14
+ "openai-agents is required for this integration. Please install it."
15
+ ) from e
16
+
17
+
18
+ class _ObservedModel(Model):
19
+ def __init__(
20
+ self,
21
+ inner: Model,
22
+ *,
23
+ metrics: Optional[list[Any]] = None,
24
+ metric_collection: Optional[str] = None,
25
+ deepeval_prompt: Optional[Any] = None,
26
+ ) -> None:
27
+ self._inner = inner
28
+ self._metrics = metrics
29
+ self._metric_collection = metric_collection
30
+ self._deepeval_prompt = deepeval_prompt
31
+
32
+ # Delegate attributes not overridden
33
+ def __getattr__(self, name: str) -> Any:
34
+ return getattr(self._inner, name)
35
+
36
+ def _get_model_name(self) -> str:
37
+ try:
38
+ for attr in ("model", "model_name", "name"):
39
+ if hasattr(self._inner, attr):
40
+ val = getattr(self._inner, attr)
41
+ if val is not None:
42
+ return str(val)
43
+ except Exception:
44
+ pass
45
+ return "unknown"
46
+
47
+ async def get_response(
48
+ self,
49
+ system_instructions,
50
+ input,
51
+ model_settings,
52
+ tools,
53
+ output_schema,
54
+ handoffs,
55
+ tracing,
56
+ *,
57
+ previous_response_id,
58
+ conversation_id,
59
+ prompt,
60
+ ):
61
+ model_name = self._get_model_name()
62
+
63
+ wrapped = observe(
64
+ metrics=self._metrics,
65
+ metric_collection=self._metric_collection,
66
+ type="llm",
67
+ model=model_name,
68
+ prompt=self._deepeval_prompt,
69
+ )(self._inner.get_response)
70
+
71
+ return await wrapped(
72
+ system_instructions,
73
+ input,
74
+ model_settings,
75
+ tools,
76
+ output_schema,
77
+ handoffs,
78
+ tracing,
79
+ previous_response_id=previous_response_id,
80
+ conversation_id=conversation_id,
81
+ prompt=prompt,
82
+ )
83
+
84
+ def stream_response(
85
+ self,
86
+ system_instructions,
87
+ input,
88
+ model_settings,
89
+ tools,
90
+ output_schema,
91
+ handoffs,
92
+ tracing,
93
+ *,
94
+ previous_response_id,
95
+ conversation_id,
96
+ prompt,
97
+ ):
98
+ # Optional: if you also want to observe streaming, uncomment and wrap similarly.
99
+ # wrapped = observe(
100
+ # metrics=self._metrics,
101
+ # metric_collection=self._metric_collection,
102
+ # type="llm",
103
+ # model=model_name,
104
+ # )(self._inner.stream_response)
105
+ # return wrapped(
106
+ # system_instructions,
107
+ # input,
108
+ # model_settings,
109
+ # tools,
110
+ # output_schema,
111
+ # handoffs,
112
+ # tracing,
113
+ # previous_response_id=previous_response_id,
114
+ # conversation_id=conversation_id,
115
+ # prompt=prompt,
116
+ # )
117
+ return self._inner.stream_response(
118
+ system_instructions,
119
+ input,
120
+ model_settings,
121
+ tools,
122
+ output_schema,
123
+ handoffs,
124
+ tracing,
125
+ previous_response_id=previous_response_id,
126
+ conversation_id=conversation_id,
127
+ prompt=prompt,
128
+ )
129
+
130
+
131
+ class _ObservedProvider(ModelProvider):
132
+ def __init__(
133
+ self,
134
+ base: ModelProvider,
135
+ *,
136
+ metrics: Optional[list[Any]] = None,
137
+ metric_collection: Optional[str] = None,
138
+ deepeval_prompt: Optional[Any] = None,
139
+ ) -> None:
140
+ self._base = base
141
+ self._metrics = metrics
142
+ self._metric_collection = metric_collection
143
+ self._deepeval_prompt = deepeval_prompt
144
+
145
+ def get_model(self, model_name: str | None) -> Model:
146
+ model = self._base.get_model(model_name)
147
+ return _ObservedModel(
148
+ model,
149
+ metrics=self._metrics,
150
+ metric_collection=self._metric_collection,
151
+ deepeval_prompt=self._deepeval_prompt,
152
+ )
153
+
154
+
155
+ @dataclass
156
+ class DeepEvalAgent(BaseAgent[Any]):
157
+ """
158
+ A subclass of agents.Agent that accepts `metrics` and `metric_collection`
159
+ and ensures the underlying model's `get_response` is wrapped with deepeval.observe.
160
+ """
161
+
162
+ metrics: list[Any] | None = field(default=None)
163
+ metric_collection: str | None = field(default=None)
164
+ deepeval_prompt: Prompt | None = field(default=None)
165
+
166
+ def __post_init__(self):
167
+ super().__post_init__()
168
+ # If a direct Model instance is set on the agent, wrap it here.
169
+ if self.model is not None and not isinstance(self.model, str):
170
+ try:
171
+ from agents.models.interface import (
172
+ Model as _Model,
173
+ ) # local import for safety
174
+
175
+ if isinstance(self.model, _Model):
176
+ self.model = _ObservedModel(
177
+ self.model,
178
+ metrics=self.metrics,
179
+ metric_collection=self.metric_collection,
180
+ deepeval_prompt=self.deepeval_prompt,
181
+ )
182
+ except Exception:
183
+ # If we can't import or wrap, silently skip.
184
+ pass
@@ -1,9 +1,9 @@
1
1
  from deepeval.tracing.tracing import (
2
2
  Observer,
3
- SpanType,
4
- current_trace_context,
3
+ current_span_context,
5
4
  )
6
5
  from deepeval.openai_agents.extractors import *
6
+ from deepeval.tracing.context import current_trace_context
7
7
 
8
8
  try:
9
9
  from agents.tracing import Span, Trace, TracingProcessor
@@ -37,34 +37,41 @@ class DeepEvalTracingProcessor(TracingProcessor):
37
37
  self.span_observers: dict[str, Observer] = {}
38
38
 
39
39
  def on_trace_start(self, trace: "Trace") -> None:
40
- observer = Observer(span_type=SpanType.AGENT, func_name=trace.name)
41
- self.root_span_observers[trace.trace_id] = observer
42
- observer.__enter__()
40
+ pass
43
41
 
44
42
  def on_trace_end(self, trace: "Trace") -> None:
45
- # set thread id if exists
46
- current_trace = current_trace_context.get()
47
- thread_id = getattr(trace, "group_id", None)
48
- current_trace.thread_id = thread_id
49
-
50
- observer = self.root_span_observers.pop(trace.trace_id, None)
51
- if observer:
52
- observer.__exit__(None, None, None)
43
+ pass
53
44
 
54
45
  def on_span_start(self, span: "Span") -> None:
55
46
  if not span.started_at:
56
47
  return
57
48
  span_type = self.get_span_kind(span.span_data)
58
- observer = Observer(span_type=span_type, func_name="NA")
59
- if span_type == "llm":
60
- observer.observe_kwargs["model"] = "temporary model"
61
- observer.update_span_properties = (
62
- lambda span_type: update_span_properties(span_type, span.span_data)
63
- )
64
- self.span_observers[span.span_id] = observer
65
- observer.__enter__()
49
+ if span_type == "agent":
50
+ if isinstance(span.span_data, AgentSpanData):
51
+ current_trace = current_trace_context.get()
52
+ if current_trace:
53
+ current_trace.name = span.span_data.name
54
+
55
+ if span_type == "tool":
56
+ return
57
+ elif span_type == "llm":
58
+ return
59
+ else:
60
+ observer = Observer(span_type=span_type, func_name="NA")
61
+ observer.update_span_properties = (
62
+ lambda base_span: update_span_properties(
63
+ base_span, span.span_data
64
+ )
65
+ )
66
+ self.span_observers[span.span_id] = observer
67
+ observer.__enter__()
66
68
 
67
69
  def on_span_end(self, span: "Span") -> None:
70
+ span_type = self.get_span_kind(span.span_data)
71
+ if span_type == "llm":
72
+ current_span = current_span_context.get()
73
+ if current_span:
74
+ update_span_properties(current_span, span.span_data)
68
75
  observer = self.span_observers.pop(span.span_id, None)
69
76
  if observer:
70
77
  observer.__exit__(None, None, None)