deepeval 3.8.1__tar.gz → 3.8.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. {deepeval-3.8.1 → deepeval-3.8.3}/PKG-INFO +1 -1
  2. deepeval-3.8.3/deepeval/_version.py +1 -0
  3. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/confident/api.py +31 -3
  4. deepeval-3.8.3/deepeval/integrations/crewai/__init__.py +16 -0
  5. deepeval-3.8.3/deepeval/integrations/crewai/handler.py +427 -0
  6. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/crewai/subs.py +23 -10
  7. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/crewai/tool.py +20 -3
  8. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/crewai/wrapper.py +69 -15
  9. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/langchain/callback.py +310 -14
  10. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/langchain/utils.py +75 -24
  11. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/llama_index/handler.py +69 -21
  12. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/pydantic_ai/instrumentator.py +50 -14
  13. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/pydantic_ai/otel.py +9 -0
  14. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/utils.py +11 -0
  15. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/simulator/conversation_simulator.py +4 -2
  16. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/telemetry.py +12 -91
  17. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/api.py +1 -0
  18. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/context.py +3 -0
  19. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/trace_context.py +5 -0
  20. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/tracing.py +7 -5
  21. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/types.py +1 -0
  22. {deepeval-3.8.1 → deepeval-3.8.3}/pyproject.toml +11 -6
  23. deepeval-3.8.1/deepeval/_version.py +0 -1
  24. deepeval-3.8.1/deepeval/integrations/crewai/__init__.py +0 -9
  25. deepeval-3.8.1/deepeval/integrations/crewai/handler.py +0 -232
  26. {deepeval-3.8.1 → deepeval-3.8.3}/LICENSE.md +0 -0
  27. {deepeval-3.8.1 → deepeval-3.8.3}/README.md +0 -0
  28. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/__init__.py +0 -0
  29. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/annotation/__init__.py +0 -0
  30. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/annotation/annotation.py +0 -0
  31. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/annotation/api.py +0 -0
  32. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/anthropic/__init__.py +0 -0
  33. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/anthropic/extractors.py +0 -0
  34. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/anthropic/patch.py +0 -0
  35. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/anthropic/utils.py +0 -0
  36. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/__init__.py +0 -0
  37. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/arc/__init__.py +0 -0
  38. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/arc/arc.py +0 -0
  39. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/arc/mode.py +0 -0
  40. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/arc/template.py +0 -0
  41. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/base_benchmark.py +0 -0
  42. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bbq/__init__.py +0 -0
  43. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bbq/bbq.py +0 -0
  44. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bbq/task.py +0 -0
  45. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bbq/template.py +0 -0
  46. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  47. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  48. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  49. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  50. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  51. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  52. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  53. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  54. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  55. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  56. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  57. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  58. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  59. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  60. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  61. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  62. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  63. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  64. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  65. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  66. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  67. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  68. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  69. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  70. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  71. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  72. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  73. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  74. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  75. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  76. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  77. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  78. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  79. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  80. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  81. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  82. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  83. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  84. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  85. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  86. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  87. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  88. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  89. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  90. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  91. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  92. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  93. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  94. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  95. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  96. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  97. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  98. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  99. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  100. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  101. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  102. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  103. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  104. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  105. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  106. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  107. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  108. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/bool_q/template.py +0 -0
  109. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/drop/__init__.py +0 -0
  110. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/drop/drop.py +0 -0
  111. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/drop/task.py +0 -0
  112. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/drop/template.py +0 -0
  113. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  114. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  115. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  116. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  117. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  118. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  119. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/gsm8k/template.py +0 -0
  120. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  121. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  122. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/hellaswag/task.py +0 -0
  123. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/hellaswag/template.py +0 -0
  124. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  125. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  126. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/human_eval/task.py +0 -0
  127. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/human_eval/template.py +0 -0
  128. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  129. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  130. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/ifeval/template.py +0 -0
  131. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/lambada/__init__.py +0 -0
  132. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/lambada/lambada.py +0 -0
  133. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/lambada/template.py +0 -0
  134. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  135. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  136. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/logi_qa/task.py +0 -0
  137. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/logi_qa/template.py +0 -0
  138. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  139. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  140. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/math_qa/task.py +0 -0
  141. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/math_qa/template.py +0 -0
  142. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  143. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  144. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/mmlu/task.py +0 -0
  145. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/mmlu/template.py +0 -0
  146. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/modes/__init__.py +0 -0
  147. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/results.py +0 -0
  148. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/schema.py +0 -0
  149. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/squad/__init__.py +0 -0
  150. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/squad/squad.py +0 -0
  151. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/squad/task.py +0 -0
  152. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/squad/template.py +0 -0
  153. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/tasks/__init__.py +0 -0
  154. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  155. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  156. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  157. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  158. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  159. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/utils.py +0 -0
  160. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  161. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/winogrande/template.py +0 -0
  162. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  163. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/__init__.py +0 -0
  164. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/dotenv_handler.py +0 -0
  165. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/main.py +0 -0
  166. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/server.py +0 -0
  167. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/test.py +0 -0
  168. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/types.py +0 -0
  169. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/cli/utils.py +0 -0
  170. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/confident/__init__.py +0 -0
  171. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/confident/types.py +0 -0
  172. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/__init__.py +0 -0
  173. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/dotenv_handler.py +0 -0
  174. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/logging.py +0 -0
  175. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/settings.py +0 -0
  176. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/settings_manager.py +0 -0
  177. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/config/utils.py +0 -0
  178. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/constants.py +0 -0
  179. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/contextvars.py +0 -0
  180. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/__init__.py +0 -0
  181. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/api.py +0 -0
  182. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/dataset.py +0 -0
  183. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/golden.py +0 -0
  184. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/test_run_tracer.py +0 -0
  185. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/types.py +0 -0
  186. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/dataset/utils.py +0 -0
  187. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/errors.py +0 -0
  188. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/__init__.py +0 -0
  189. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/api.py +0 -0
  190. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/compare.py +0 -0
  191. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/configs.py +0 -0
  192. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/evaluate.py +0 -0
  193. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/execute.py +0 -0
  194. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/types.py +0 -0
  195. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/evaluate/utils.py +0 -0
  196. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/__init__.py +0 -0
  197. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/hugging_face/__init__.py +0 -0
  198. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/hugging_face/callback.py +0 -0
  199. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  200. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  201. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/hugging_face/utils.py +0 -0
  202. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/langchain/__init__.py +0 -0
  203. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/langchain/patch.py +0 -0
  204. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/llama_index/__init__.py +0 -0
  205. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/llama_index/utils.py +0 -0
  206. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
  207. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/pydantic_ai/agent.py +0 -0
  208. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
  209. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/key_handler.py +0 -0
  210. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/__init__.py +0 -0
  211. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  212. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  213. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  214. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/answer_relevancy/template.py +0 -0
  215. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/api.py +0 -0
  216. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  217. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  218. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  219. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/arena_g_eval/template.py +0 -0
  220. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  221. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  222. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  223. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/argument_correctness/schema.py +0 -0
  224. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/argument_correctness/template.py +0 -0
  225. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/base_metric.py +0 -0
  226. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/bias/__init__.py +0 -0
  227. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/bias/bias.py +0 -0
  228. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/bias/schema.py +0 -0
  229. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/bias/template.py +0 -0
  230. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  231. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  232. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_precision/schema.py +0 -0
  233. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_precision/template.py +0 -0
  234. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  235. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  236. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_recall/schema.py +0 -0
  237. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_recall/template.py +0 -0
  238. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  239. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  240. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  241. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  242. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  243. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  244. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  245. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversation_completeness/template.py +0 -0
  246. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  247. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  248. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  249. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_dag/templates.py +0 -0
  250. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  251. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  252. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  253. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  254. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/__init__.py +0 -0
  255. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/dag.py +0 -0
  256. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/graph.py +0 -0
  257. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/nodes.py +0 -0
  258. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/schema.py +0 -0
  259. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/templates.py +0 -0
  260. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/dag/utils.py +0 -0
  261. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/exact_match/__init__.py +0 -0
  262. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/exact_match/exact_match.py +0 -0
  263. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/faithfulness/__init__.py +0 -0
  264. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  265. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/faithfulness/schema.py +0 -0
  266. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/faithfulness/template.py +0 -0
  267. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/g_eval/__init__.py +0 -0
  268. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/g_eval/g_eval.py +0 -0
  269. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/g_eval/schema.py +0 -0
  270. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/g_eval/template.py +0 -0
  271. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/g_eval/utils.py +0 -0
  272. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
  273. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
  274. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/goal_accuracy/schema.py +0 -0
  275. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/goal_accuracy/template.py +0 -0
  276. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/hallucination/__init__.py +0 -0
  277. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/hallucination/hallucination.py +0 -0
  278. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/hallucination/schema.py +0 -0
  279. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/hallucination/template.py +0 -0
  280. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/indicator.py +0 -0
  281. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/json_correctness/__init__.py +0 -0
  282. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  283. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/json_correctness/schema.py +0 -0
  284. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/json_correctness/template.py +0 -0
  285. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  286. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  287. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  288. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/knowledge_retention/template.py +0 -0
  289. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp/__init__.py +0 -0
  290. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  291. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  292. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp/schema.py +0 -0
  293. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp/template.py +0 -0
  294. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  295. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
  296. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  297. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  298. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/misuse/__init__.py +0 -0
  299. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/misuse/misuse.py +0 -0
  300. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/misuse/schema.py +0 -0
  301. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/misuse/template.py +0 -0
  302. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  303. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  304. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  305. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  306. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  307. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  308. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  309. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  310. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  311. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  312. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  313. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  314. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  315. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  316. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  317. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  318. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  319. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  320. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  321. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  322. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  323. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/non_advice/__init__.py +0 -0
  324. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/non_advice/non_advice.py +0 -0
  325. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/non_advice/schema.py +0 -0
  326. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/non_advice/template.py +0 -0
  327. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pattern_match/__init__.py +0 -0
  328. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
  329. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  330. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
  331. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pii_leakage/schema.py +0 -0
  332. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/pii_leakage/template.py +0 -0
  333. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_adherence/__init__.py +0 -0
  334. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
  335. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_adherence/schema.py +0 -0
  336. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_adherence/template.py +0 -0
  337. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_quality/__init__.py +0 -0
  338. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
  339. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_quality/schema.py +0 -0
  340. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/plan_quality/template.py +0 -0
  341. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  342. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  343. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  344. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/prompt_alignment/template.py +0 -0
  345. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/ragas.py +0 -0
  346. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_adherence/__init__.py +0 -0
  347. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  348. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_adherence/schema.py +0 -0
  349. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_adherence/template.py +0 -0
  350. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_violation/__init__.py +0 -0
  351. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_violation/role_violation.py +0 -0
  352. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_violation/schema.py +0 -0
  353. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/role_violation/template.py +0 -0
  354. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/step_efficiency/__init__.py +0 -0
  355. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/step_efficiency/schema.py +0 -0
  356. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
  357. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/step_efficiency/template.py +0 -0
  358. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/summarization/__init__.py +0 -0
  359. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/summarization/schema.py +0 -0
  360. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/summarization/summarization.py +0 -0
  361. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/summarization/template.py +0 -0
  362. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/task_completion/__init__.py +0 -0
  363. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/task_completion/schema.py +0 -0
  364. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/task_completion/task_completion.py +0 -0
  365. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/task_completion/template.py +0 -0
  366. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  367. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_correctness/schema.py +0 -0
  368. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_correctness/template.py +0 -0
  369. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  370. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_use/__init__.py +0 -0
  371. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_use/schema.py +0 -0
  372. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_use/template.py +0 -0
  373. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/tool_use/tool_use.py +0 -0
  374. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/topic_adherence/__init__.py +0 -0
  375. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/topic_adherence/schema.py +0 -0
  376. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/topic_adherence/template.py +0 -0
  377. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
  378. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/toxicity/__init__.py +0 -0
  379. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/toxicity/schema.py +0 -0
  380. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/toxicity/template.py +0 -0
  381. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/toxicity/toxicity.py +0 -0
  382. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_precision/__init__.py +0 -0
  383. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_precision/schema.py +0 -0
  384. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_precision/template.py +0 -0
  385. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +0 -0
  386. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_recall/__init__.py +0 -0
  387. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_recall/schema.py +0 -0
  388. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_recall/template.py +0 -0
  389. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +0 -0
  390. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_relevancy/__init__.py +0 -0
  391. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_relevancy/schema.py +0 -0
  392. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_relevancy/template.py +0 -0
  393. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +0 -0
  394. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_faithfulness/__init__.py +0 -0
  395. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_faithfulness/schema.py +0 -0
  396. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_faithfulness/template.py +0 -0
  397. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +0 -0
  398. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  399. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  400. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_relevancy/template.py +0 -0
  401. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  402. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/model_integrations/__init__.py +0 -0
  403. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/model_integrations/types.py +0 -0
  404. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/model_integrations/utils.py +0 -0
  405. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/__init__.py +0 -0
  406. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/_summac_model.py +0 -0
  407. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/answer_relevancy_model.py +0 -0
  408. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/base_model.py +0 -0
  409. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/detoxify_model.py +0 -0
  410. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/embedding_models/__init__.py +0 -0
  411. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
  412. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
  413. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
  414. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
  415. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/hallucination_model.py +0 -0
  416. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/__init__.py +0 -0
  417. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
  418. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/anthropic_model.py +0 -0
  419. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/azure_model.py +0 -0
  420. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/constants.py +0 -0
  421. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/deepseek_model.py +0 -0
  422. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/gemini_model.py +0 -0
  423. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/grok_model.py +0 -0
  424. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/kimi_model.py +0 -0
  425. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/litellm_model.py +0 -0
  426. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/local_model.py +0 -0
  427. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/ollama_model.py +0 -0
  428. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/openai_model.py +0 -0
  429. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/openrouter_model.py +0 -0
  430. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/portkey_model.py +0 -0
  431. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/llms/utils.py +0 -0
  432. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/retry_policy.py +0 -0
  433. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/summac_model.py +0 -0
  434. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/unbias_model.py +0 -0
  435. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/models/utils.py +0 -0
  436. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai/__init__.py +0 -0
  437. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai/extractors.py +0 -0
  438. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai/patch.py +0 -0
  439. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai/utils.py +0 -0
  440. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/__init__.py +0 -0
  441. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/agent.py +0 -0
  442. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/callback_handler.py +0 -0
  443. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/extractors.py +0 -0
  444. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/patch.py +0 -0
  445. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/openai_agents/runner.py +0 -0
  446. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/__init__.py +0 -0
  447. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/__init__.py +0 -0
  448. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/base.py +0 -0
  449. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/configs.py +0 -0
  450. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/copro/__init__.py +0 -0
  451. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/copro/copro.py +0 -0
  452. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/gepa/__init__.py +0 -0
  453. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/gepa/gepa.py +0 -0
  454. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/miprov2/__init__.py +0 -0
  455. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +0 -0
  456. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/miprov2/miprov2.py +0 -0
  457. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/miprov2/proposer.py +0 -0
  458. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/simba/__init__.py +0 -0
  459. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/simba/simba.py +0 -0
  460. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/algorithms/simba/types.py +0 -0
  461. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/configs.py +0 -0
  462. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/policies.py +0 -0
  463. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/prompt_optimizer.py +0 -0
  464. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/rewriter/__init__.py +0 -0
  465. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/rewriter/rewriter.py +0 -0
  466. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/rewriter/utils.py +0 -0
  467. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/scorer/__init__.py +0 -0
  468. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/scorer/base.py +0 -0
  469. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/scorer/scorer.py +0 -0
  470. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/scorer/utils.py +0 -0
  471. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/types.py +0 -0
  472. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/optimizer/utils.py +0 -0
  473. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/plugins/__init__.py +0 -0
  474. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/plugins/plugin.py +0 -0
  475. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/progress_context.py +0 -0
  476. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/prompt/__init__.py +0 -0
  477. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/prompt/api.py +0 -0
  478. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/prompt/prompt.py +0 -0
  479. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/prompt/utils.py +0 -0
  480. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/py.typed +0 -0
  481. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/red_teaming/README.md +0 -0
  482. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/scorer/__init__.py +0 -0
  483. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/scorer/scorer.py +0 -0
  484. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/simulator/__init__.py +0 -0
  485. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/simulator/schema.py +0 -0
  486. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/simulator/template.py +0 -0
  487. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/singleton.py +0 -0
  488. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/__init__.py +0 -0
  489. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/base_synthesizer.py +0 -0
  490. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/chunking/__init__.py +0 -0
  491. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  492. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  493. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/config.py +0 -0
  494. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/schema.py +0 -0
  495. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/synthesizer.py +0 -0
  496. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/templates/__init__.py +0 -0
  497. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/templates/template.py +0 -0
  498. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  499. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  500. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/types.py +0 -0
  501. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/synthesizer/utils.py +0 -0
  502. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/__init__.py +0 -0
  503. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/api.py +0 -0
  504. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/arena_test_case.py +0 -0
  505. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/conversational_test_case.py +0 -0
  506. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/llm_test_case.py +0 -0
  507. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/mcp.py +0 -0
  508. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_case/utils.py +0 -0
  509. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/__init__.py +0 -0
  510. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/api.py +0 -0
  511. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/cache.py +0 -0
  512. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/hooks.py +0 -0
  513. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/hyperparameters.py +0 -0
  514. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/test_run/test_run.py +0 -0
  515. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/__init__.py +0 -0
  516. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/offline_evals/__init__.py +0 -0
  517. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/offline_evals/api.py +0 -0
  518. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/offline_evals/span.py +0 -0
  519. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/offline_evals/thread.py +0 -0
  520. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/offline_evals/trace.py +0 -0
  521. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/otel/__init__.py +0 -0
  522. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/otel/exporter.py +0 -0
  523. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/otel/test_exporter.py +0 -0
  524. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/otel/utils.py +0 -0
  525. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/patchers.py +0 -0
  526. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  527. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/trace_test_manager.py +0 -0
  528. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/tracing/utils.py +0 -0
  529. {deepeval-3.8.1 → deepeval-3.8.3}/deepeval/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.8.1
3
+ Version: 3.8.3
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.8.3"
@@ -26,16 +26,44 @@ API_BASE_URL_EU = "https://eu.api.confident-ai.com"
26
26
  retryable_exceptions = requests.exceptions.SSLError
27
27
 
28
28
 
29
+ def _infer_region_from_api_key(api_key: Optional[str]) -> Optional[str]:
30
+ """
31
+ Infer region from Confident API key prefix.
32
+
33
+ Supported:
34
+ - confident_eu_... => "EU"
35
+ - confident_us_... => "US"
36
+
37
+ Returns None if prefix is not recognized or api_key is falsy.
38
+ """
39
+ if not api_key:
40
+ return None
41
+ key = api_key.strip().lower()
42
+ if key.startswith("confident_eu_"):
43
+ return "EU"
44
+ if key.startswith("confident_us_"):
45
+ return "US"
46
+ return None
47
+
48
+
29
49
  def get_base_api_url():
30
50
  s = get_settings()
31
51
  if s.CONFIDENT_BASE_URL:
32
52
  base_url = s.CONFIDENT_BASE_URL.rstrip("/")
33
53
  return base_url
54
+ # If the user has explicitly set a region, respect it.
34
55
  region = KEY_FILE_HANDLER.fetch_data(KeyValues.CONFIDENT_REGION)
35
- if region == "EU":
56
+ if region:
57
+ return API_BASE_URL_EU if region == "EU" else API_BASE_URL
58
+
59
+ # Otherwise, infer region from the API key prefix.
60
+ api_key = get_confident_api_key()
61
+ inferred = _infer_region_from_api_key(api_key)
62
+ if inferred == "EU":
36
63
  return API_BASE_URL_EU
37
- else:
38
- return API_BASE_URL
64
+
65
+ # Default to US (backwards compatible)
66
+ return API_BASE_URL
39
67
 
40
68
 
41
69
  def get_confident_api_key() -> Optional[str]:
@@ -0,0 +1,16 @@
1
+ from .handler import instrument_crewai, reset_crewai_instrumentation
2
+ from .subs import (
3
+ DeepEvalCrew as Crew,
4
+ DeepEvalAgent as Agent,
5
+ DeepEvalLLM as LLM,
6
+ )
7
+ from .tool import tool
8
+
9
+ __all__ = [
10
+ "instrument_crewai",
11
+ "Crew",
12
+ "Agent",
13
+ "LLM",
14
+ "tool",
15
+ "reset_crewai_instrumentation",
16
+ ]
@@ -0,0 +1,427 @@
1
+ import logging
2
+ import deepeval
3
+ from collections import defaultdict
4
+ from time import perf_counter
5
+ from typing import Optional, Tuple, Any, List, Union
6
+ from deepeval.telemetry import capture_tracing_integration
7
+ from deepeval.tracing.context import current_span_context, current_trace_context
8
+ from deepeval.tracing.tracing import Observer, trace_manager
9
+ from deepeval.tracing.types import ToolSpan, SpanType, TraceSpanStatus
10
+ from deepeval.config.settings import get_settings
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ try:
17
+ from crewai.events import BaseEventListener
18
+ from crewai.events import (
19
+ CrewKickoffStartedEvent,
20
+ CrewKickoffCompletedEvent,
21
+ LLMCallStartedEvent,
22
+ LLMCallCompletedEvent,
23
+ AgentExecutionStartedEvent,
24
+ AgentExecutionCompletedEvent,
25
+ ToolUsageStartedEvent,
26
+ ToolUsageFinishedEvent,
27
+ KnowledgeRetrievalStartedEvent,
28
+ KnowledgeRetrievalCompletedEvent,
29
+ )
30
+
31
+ crewai_installed = True
32
+ except ImportError as e:
33
+ if get_settings().DEEPEVAL_VERBOSE_MODE:
34
+ if isinstance(e, ModuleNotFoundError):
35
+ logger.warning(
36
+ "Optional crewai dependency not installed: %s",
37
+ e.name,
38
+ stacklevel=2,
39
+ )
40
+ else:
41
+ logger.warning(
42
+ "Optional crewai import failed: %s",
43
+ e,
44
+ stacklevel=2,
45
+ )
46
+
47
+ crewai_installed = False
48
+
49
+ # GLOBAL STATE to prevent duplicate listeners
50
+ IS_WRAPPED_ALL = False
51
+ _listener_instance = None
52
+
53
+
54
+ def is_crewai_installed():
55
+ if not crewai_installed:
56
+ raise ImportError(
57
+ "CrewAI is not installed. Please install it with `pip install crewai`."
58
+ )
59
+
60
+
61
+ def _get_metrics_data(obj: Any) -> Tuple[Optional[str], Optional[Any]]:
62
+ """Helper to safely extract metrics attached to CrewAI objects."""
63
+
64
+ if not obj:
65
+ return None, None
66
+ metric_collection = getattr(obj, "_metric_collection", None)
67
+ metrics = getattr(obj, "_metrics", None)
68
+
69
+ if metric_collection is not None or metrics is not None:
70
+ return metric_collection, metrics
71
+
72
+ func = getattr(obj, "func", None)
73
+ if func:
74
+ metric_collection = getattr(func, "_metric_collection", None)
75
+ metrics = getattr(func, "_metrics", None)
76
+
77
+ return metric_collection, metrics
78
+
79
+
80
+ class CrewAIEventsListener(BaseEventListener):
81
+ def __init__(self):
82
+ is_crewai_installed()
83
+ super().__init__()
84
+ self.span_observers: dict[str, Observer] = {}
85
+ self.tool_observers_stack: dict[str, List[Union[Observer, None]]] = (
86
+ defaultdict(list)
87
+ )
88
+
89
+ def reset_state(self):
90
+ """Clears all internal state to prevent pollution between tests."""
91
+ self.span_observers.clear()
92
+ self.tool_observers_stack.clear()
93
+
94
+ @staticmethod
95
+ def get_tool_stack_key(source, tool_name) -> str:
96
+ """
97
+ Generates a unique key for the tool stack.
98
+ FIX: Uses role/name instead of id() to be robust against object copying by CrewAI.
99
+ """
100
+ identifier = getattr(
101
+ source, "role", getattr(source, "name", str(id(source)))
102
+ )
103
+ return f"{tool_name}_{identifier}"
104
+
105
+ @staticmethod
106
+ def get_knowledge_execution_id(source, event) -> str:
107
+ source_id = id(source)
108
+ agent_id = id(event.agent) if hasattr(event, "agent") else "unknown"
109
+ execution_id = f"_knowledge_{source_id}_{agent_id}"
110
+
111
+ return execution_id
112
+
113
+ @staticmethod
114
+ def get_llm_execution_id(source, event) -> str:
115
+ source_id = id(source)
116
+ return f"llm_{source_id}"
117
+
118
+ def _flatten_tool_span(self, span):
119
+ """
120
+ Callback to move any child ToolSpans up to the parent.
121
+ """
122
+ if not span.parent_uuid or not span.children:
123
+ return
124
+
125
+ parent_span = trace_manager.get_span_by_uuid(span.parent_uuid)
126
+ if not parent_span:
127
+ return
128
+
129
+ # Identify child tool spans (ghost nesting)
130
+ tools_to_move = [
131
+ child for child in span.children if isinstance(child, ToolSpan)
132
+ ]
133
+
134
+ if tools_to_move:
135
+ if parent_span.children is None:
136
+ parent_span.children = []
137
+
138
+ for child in tools_to_move:
139
+ child.parent_uuid = parent_span.uuid
140
+ parent_span.children.append(child)
141
+
142
+ span.children = [
143
+ child
144
+ for child in span.children
145
+ if not isinstance(child, ToolSpan)
146
+ ]
147
+
148
+ def setup_listeners(self, crewai_event_bus):
149
+ @crewai_event_bus.on(CrewKickoffStartedEvent)
150
+ def on_crew_started(source, event: CrewKickoffStartedEvent):
151
+ current_span = current_span_context.get()
152
+ if current_span:
153
+ current_span.input = event.inputs
154
+ current_trace = current_trace_context.get()
155
+ if current_trace:
156
+ current_trace.input = event.inputs
157
+
158
+ @crewai_event_bus.on(CrewKickoffCompletedEvent)
159
+ def on_crew_completed(source, event: CrewKickoffCompletedEvent):
160
+ current_span = current_span_context.get()
161
+ output = getattr(
162
+ event, "output", getattr(event, "result", str(event))
163
+ )
164
+ if current_span:
165
+ current_span.output = str(output)
166
+ current_trace = current_trace_context.get()
167
+ if current_trace:
168
+ current_trace.output = str(output)
169
+
170
+ @crewai_event_bus.on(LLMCallStartedEvent)
171
+ def on_llm_started(source, event: LLMCallStartedEvent):
172
+ metric_collection, metrics = _get_metrics_data(source)
173
+ observer = Observer(
174
+ span_type="llm",
175
+ func_name="call",
176
+ observe_kwargs={"model": getattr(event, "model", "unknown")},
177
+ metric_collection=metric_collection,
178
+ metrics=metrics,
179
+ )
180
+ self.span_observers[self.get_llm_execution_id(source, event)] = (
181
+ observer
182
+ )
183
+ observer.__enter__()
184
+
185
+ if observer.trace_uuid:
186
+ span = trace_manager.get_span_by_uuid(observer.uuid)
187
+ if span:
188
+ msgs = getattr(event, "messages")
189
+ span.input = msgs
190
+
191
+ @crewai_event_bus.on(LLMCallCompletedEvent)
192
+ def on_llm_completed(source, event: LLMCallCompletedEvent):
193
+ key = self.get_llm_execution_id(source, event)
194
+ if key in self.span_observers:
195
+ observer = self.span_observers.pop(key)
196
+ if observer:
197
+ current_span = current_span_context.get()
198
+ token = None
199
+ span_to_close = trace_manager.get_span_by_uuid(
200
+ observer.uuid
201
+ )
202
+
203
+ if span_to_close:
204
+ output = getattr(
205
+ event, "response", getattr(event, "output", "")
206
+ )
207
+ span_to_close.output = output
208
+ if (
209
+ not current_span
210
+ or current_span.uuid != observer.uuid
211
+ ):
212
+ token = current_span_context.set(span_to_close)
213
+
214
+ observer.__exit__(None, None, None)
215
+ if token:
216
+ current_span_context.reset(token)
217
+
218
+ @crewai_event_bus.on(AgentExecutionStartedEvent)
219
+ def on_agent_started(source, event: AgentExecutionStartedEvent):
220
+ current_span = current_span_context.get()
221
+ if current_span:
222
+ current_span.input = event.task_prompt
223
+
224
+ @crewai_event_bus.on(AgentExecutionCompletedEvent)
225
+ def on_agent_completed(source, event: AgentExecutionCompletedEvent):
226
+ current_span = current_span_context.get()
227
+ if current_span:
228
+ current_span.output = getattr(
229
+ event, "output", getattr(event, "result", "")
230
+ )
231
+
232
+ @crewai_event_bus.on(ToolUsageStartedEvent)
233
+ def on_tool_started(source, event: ToolUsageStartedEvent):
234
+ key = self.get_tool_stack_key(source, event.tool_name)
235
+
236
+ # 1. Internal Stack Check
237
+ if self.tool_observers_stack[key]:
238
+ self.tool_observers_stack[key].append(None)
239
+ return
240
+
241
+ # 2. SMART DEDUPING
242
+ current_span = current_span_context.get()
243
+ span_type = getattr(current_span, "type", None)
244
+ is_tool_span = span_type == "tool" or span_type == SpanType.TOOL
245
+ if (
246
+ is_tool_span
247
+ and getattr(current_span, "name", "") == event.tool_name
248
+ ):
249
+ self.tool_observers_stack[key].append(None)
250
+ return
251
+
252
+ metric_collection = None
253
+ metrics = None
254
+
255
+ if hasattr(source, "tools"):
256
+ for tools in source.tools:
257
+ if getattr(tools, "name", None) == event.tool_name:
258
+ metric_collection, metrics = _get_metrics_data(tools)
259
+ break
260
+
261
+ if not metric_collection:
262
+ agent = getattr(source, "agent", source)
263
+ metric_collection, metrics = _get_metrics_data(agent)
264
+
265
+ observer = Observer(
266
+ span_type="tool",
267
+ func_name=event.tool_name,
268
+ function_kwargs=event.tool_args,
269
+ metric_collection=metric_collection,
270
+ metrics=metrics,
271
+ )
272
+
273
+ self.tool_observers_stack[key].append(observer)
274
+ observer.__enter__()
275
+
276
+ @crewai_event_bus.on(ToolUsageFinishedEvent)
277
+ def on_tool_completed(source, event: ToolUsageFinishedEvent):
278
+ key = self.get_tool_stack_key(source, event.tool_name)
279
+ observer = None
280
+
281
+ if (
282
+ key in self.tool_observers_stack
283
+ and self.tool_observers_stack[key]
284
+ ):
285
+ item = self.tool_observers_stack[key].pop()
286
+ if item is None:
287
+ return
288
+ observer = item
289
+
290
+ if not observer:
291
+ current_span = current_span_context.get()
292
+ if (
293
+ current_span
294
+ and getattr(current_span, "type", None)
295
+ in ["tool", SpanType.TOOL]
296
+ and getattr(current_span, "name", "") == event.tool_name
297
+ ):
298
+ current_span.output = getattr(
299
+ event, "output", getattr(event, "result", None)
300
+ )
301
+
302
+ if current_span.end_time is None:
303
+ current_span.end_time = perf_counter()
304
+
305
+ current_span.status = TraceSpanStatus.SUCCESS
306
+
307
+ self._flatten_tool_span(current_span)
308
+ trace_manager.remove_span(current_span.uuid)
309
+
310
+ if current_span.parent_uuid:
311
+ parent = trace_manager.get_span_by_uuid(
312
+ current_span.parent_uuid
313
+ )
314
+ current_span_context.set(parent if parent else None)
315
+ else:
316
+ current_span_context.set(None)
317
+ return
318
+
319
+ if observer:
320
+ current_span = current_span_context.get()
321
+ token = None
322
+ span_to_close = trace_manager.get_span_by_uuid(observer.uuid)
323
+
324
+ if span_to_close:
325
+ span_to_close.output = getattr(
326
+ event, "output", getattr(event, "result", None)
327
+ )
328
+ if not current_span or current_span.uuid != observer.uuid:
329
+ token = current_span_context.set(span_to_close)
330
+
331
+ observer.update_span_properties = self._flatten_tool_span
332
+ observer.__exit__(None, None, None)
333
+
334
+ if span_to_close and span_to_close.end_time is None:
335
+ span_to_close.end_time = perf_counter()
336
+ span_to_close.status = TraceSpanStatus.SUCCESS
337
+
338
+ if token:
339
+ current_span_context.reset(token)
340
+
341
+ @crewai_event_bus.on(KnowledgeRetrievalStartedEvent)
342
+ def on_knowledge_started(source, event: KnowledgeRetrievalStartedEvent):
343
+ observer = Observer(
344
+ span_type="tool",
345
+ func_name="knowledge_retrieval",
346
+ function_kwargs={},
347
+ )
348
+ self.span_observers[
349
+ self.get_knowledge_execution_id(source, event)
350
+ ] = observer
351
+ observer.__enter__()
352
+
353
+ @crewai_event_bus.on(KnowledgeRetrievalCompletedEvent)
354
+ def on_knowledge_completed(
355
+ source, event: KnowledgeRetrievalCompletedEvent
356
+ ):
357
+ key = self.get_knowledge_execution_id(source, event)
358
+ if key in self.span_observers:
359
+ observer = self.span_observers.pop(key)
360
+ if observer:
361
+ current_span = current_span_context.get()
362
+ token = None
363
+ span_to_close = trace_manager.get_span_by_uuid(
364
+ observer.uuid
365
+ )
366
+
367
+ if span_to_close:
368
+ span_to_close.input = event.query
369
+ span_to_close.output = event.retrieved_knowledge
370
+
371
+ if (
372
+ not current_span
373
+ or current_span.uuid != observer.uuid
374
+ ):
375
+ token = current_span_context.set(span_to_close)
376
+
377
+ observer.__exit__(None, None, None)
378
+
379
+ if token:
380
+ current_span_context.reset(token)
381
+
382
+
383
+ def instrument_crewai(api_key: Optional[str] = None):
384
+ global _listener_instance
385
+
386
+ is_crewai_installed()
387
+ with capture_tracing_integration("crewai"):
388
+ if api_key:
389
+ deepeval.login(api_key)
390
+
391
+ wrap_all()
392
+
393
+ if _listener_instance is None:
394
+ _listener_instance = CrewAIEventsListener()
395
+
396
+
397
+ def reset_crewai_instrumentation():
398
+ global _listener_instance
399
+ if _listener_instance:
400
+ _listener_instance.reset_state()
401
+
402
+
403
+ def wrap_all():
404
+ global IS_WRAPPED_ALL
405
+
406
+ if not IS_WRAPPED_ALL:
407
+ from deepeval.integrations.crewai.wrapper import (
408
+ wrap_crew_kickoff,
409
+ wrap_crew_kickoff_for_each,
410
+ wrap_crew_kickoff_async,
411
+ wrap_crew_kickoff_for_each_async,
412
+ wrap_crew_akickoff,
413
+ wrap_crew_akickoff_for_each,
414
+ wrap_agent_execute_task,
415
+ wrap_agent_aexecute_task,
416
+ )
417
+
418
+ wrap_crew_kickoff()
419
+ wrap_crew_kickoff_for_each()
420
+ wrap_crew_kickoff_async()
421
+ wrap_crew_kickoff_for_each_async()
422
+ wrap_crew_akickoff()
423
+ wrap_crew_akickoff_for_each()
424
+ wrap_agent_execute_task()
425
+ wrap_agent_aexecute_task()
426
+
427
+ IS_WRAPPED_ALL = True
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Type, TypeVar
1
+ from typing import List, Optional, Type, TypeVar, Callable
2
2
  from pydantic import PrivateAttr
3
3
 
4
4
  from deepeval.metrics.base_metric import BaseMetric
@@ -28,14 +28,10 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
28
28
  _metric_collection: Optional[str] = PrivateAttr(default=None)
29
29
  _metrics: Optional[List[BaseMetric]] = PrivateAttr(default=None)
30
30
 
31
- def __init__(
32
- self,
33
- *args,
34
- metrics: Optional[List[BaseMetric]] = None,
35
- metric_collection: Optional[str] = None,
36
- **kwargs
37
- ):
31
+ def __init__(self, *args, **kwargs):
38
32
  is_crewai_installed()
33
+ metric_collection = kwargs.pop("metric_collection", None)
34
+ metrics = kwargs.pop("metrics", None)
39
35
  super().__init__(*args, **kwargs)
40
36
  self._metric_collection = metric_collection
41
37
  self._metrics = metrics
@@ -45,7 +41,24 @@ def create_deepeval_class(base_class: Type[T], class_name: str) -> Type[T]:
45
41
  return DeepEvalClass
46
42
 
47
43
 
48
- # Create the classes
44
+ def create_deepeval_llm(base_factory: Callable) -> Callable:
45
+ """Wrapper for factory functions/classes (LLM)."""
46
+
47
+ def factory_wrapper(*args, **kwargs):
48
+ is_crewai_installed()
49
+ metric_collection = kwargs.pop("metric_collection", None)
50
+ metrics = kwargs.pop("metrics", None)
51
+ instance = base_factory(*args, **kwargs)
52
+ try:
53
+ instance._metric_collection = metric_collection
54
+ instance._metrics = metrics
55
+ except Exception:
56
+ pass
57
+ return instance
58
+
59
+ return factory_wrapper
60
+
61
+
49
62
  DeepEvalCrew = create_deepeval_class(Crew, "DeepEvalCrew")
50
63
  DeepEvalAgent = create_deepeval_class(Agent, "DeepEvalAgent")
51
- DeepEvalLLM = create_deepeval_class(LLM, "DeepEvalLLM")
64
+ DeepEvalLLM = create_deepeval_llm(LLM)
@@ -15,6 +15,20 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
15
15
  """
16
16
  crewai_kwargs = kwargs
17
17
 
18
+ def _attach_metadata(tool_instance):
19
+ try:
20
+ object.__setattr__(
21
+ tool_instance, "metric_collection", metric_collection
22
+ )
23
+ object.__setattr__(tool_instance, "metrics", metric)
24
+ except Exception:
25
+ try:
26
+ tool_instance._metric_collection = metric_collection
27
+ tool_instance._metrics = metric
28
+ except Exception:
29
+ pass
30
+ return tool_instance
31
+
18
32
  # Case 1: @tool (function passed directly)
19
33
  if len(args) == 1 and callable(args[0]):
20
34
  f = args[0]
@@ -29,7 +43,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
29
43
  result = f(*f_args, **f_kwargs)
30
44
  return result
31
45
 
32
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
46
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
47
+ return _attach_metadata(tool_instance)
33
48
 
34
49
  # Case 2: @tool("name")
35
50
  if len(args) == 1 and isinstance(args[0], str):
@@ -45,7 +60,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
45
60
  result = f(*f_args, **f_kwargs)
46
61
  return result
47
62
 
48
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
63
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
64
+ return _attach_metadata(tool_instance)
49
65
 
50
66
  return _decorator
51
67
 
@@ -64,7 +80,8 @@ def tool(*args, metric=None, metric_collection=None, **kwargs) -> Callable:
64
80
  result = f(*f_args, **f_kwargs)
65
81
  return result
66
82
 
67
- return crewai_tool(tool_name, **crewai_kwargs)(wrapped)
83
+ tool_instance = crewai_tool(tool_name, **crewai_kwargs)(wrapped)
84
+ return _attach_metadata(tool_instance)
68
85
 
69
86
  return _decorator
70
87