deepeval 3.5.2__tar.gz → 3.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (472) hide show
  1. {deepeval-3.5.2 → deepeval-3.5.4}/PKG-INFO +1 -1
  2. deepeval-3.5.4/deepeval/_version.py +1 -0
  3. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/config/settings.py +94 -2
  4. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/config/utils.py +54 -1
  5. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/constants.py +27 -0
  6. deepeval-3.5.4/deepeval/integrations/pydantic_ai/__init__.py +5 -0
  7. deepeval-3.5.4/deepeval/integrations/pydantic_ai/agent.py +339 -0
  8. deepeval-3.5.4/deepeval/integrations/pydantic_ai/patcher.py +484 -0
  9. deepeval-3.5.4/deepeval/integrations/pydantic_ai/utils.py +323 -0
  10. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
  11. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/non_advice/non_advice.py +2 -2
  12. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/pii_leakage/pii_leakage.py +2 -2
  13. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/embedding_models/azure_embedding_model.py +40 -9
  14. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/embedding_models/local_embedding_model.py +52 -9
  15. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
  16. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/embedding_models/openai_embedding_model.py +47 -5
  17. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/amazon_bedrock_model.py +31 -4
  18. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/anthropic_model.py +39 -13
  19. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/azure_model.py +37 -38
  20. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/deepseek_model.py +36 -7
  21. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/gemini_model.py +10 -0
  22. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/grok_model.py +50 -3
  23. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/kimi_model.py +37 -7
  24. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/local_model.py +38 -12
  25. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/ollama_model.py +15 -3
  26. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/openai_model.py +37 -44
  27. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/mlllms/gemini_model.py +21 -3
  28. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/mlllms/ollama_model.py +38 -13
  29. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/mlllms/openai_model.py +18 -42
  30. deepeval-3.5.4/deepeval/models/retry_policy.py +764 -0
  31. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/tracing.py +87 -0
  32. {deepeval-3.5.2 → deepeval-3.5.4}/pyproject.toml +1 -1
  33. deepeval-3.5.2/deepeval/_version.py +0 -1
  34. deepeval-3.5.2/deepeval/integrations/pydantic_ai/__init__.py +0 -3
  35. deepeval-3.5.2/deepeval/integrations/pydantic_ai/patcher.py +0 -411
  36. deepeval-3.5.2/deepeval/integrations/pydantic_ai/utils.py +0 -86
  37. deepeval-3.5.2/deepeval/models/retry_policy.py +0 -280
  38. {deepeval-3.5.2 → deepeval-3.5.4}/LICENSE.md +0 -0
  39. {deepeval-3.5.2 → deepeval-3.5.4}/README.md +0 -0
  40. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/__init__.py +0 -0
  41. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/annotation/__init__.py +0 -0
  42. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/annotation/annotation.py +0 -0
  43. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/annotation/api.py +0 -0
  44. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/__init__.py +0 -0
  45. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/arc/__init__.py +0 -0
  46. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/arc/arc.py +0 -0
  47. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/arc/mode.py +0 -0
  48. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/arc/template.py +0 -0
  49. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/base_benchmark.py +0 -0
  50. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bbq/__init__.py +0 -0
  51. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bbq/bbq.py +0 -0
  52. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bbq/task.py +0 -0
  53. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bbq/template.py +0 -0
  54. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
  55. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
  56. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
  57. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
  58. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
  59. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
  60. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
  61. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
  62. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
  63. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
  64. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
  65. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
  66. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  67. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
  68. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
  69. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
  70. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
  71. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
  72. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
  73. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  74. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
  75. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
  76. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
  77. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
  78. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
  79. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  80. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  81. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  82. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
  83. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
  84. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
  85. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
  86. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
  87. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
  88. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
  89. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
  90. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
  91. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
  92. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
  93. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
  94. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
  95. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
  96. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
  97. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
  98. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
  99. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
  100. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
  101. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
  102. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
  103. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
  104. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
  105. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
  106. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
  107. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  108. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  109. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  110. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
  111. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
  112. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
  113. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
  114. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bool_q/__init__.py +0 -0
  115. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
  116. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/bool_q/template.py +0 -0
  117. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/drop/__init__.py +0 -0
  118. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/drop/drop.py +0 -0
  119. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/drop/task.py +0 -0
  120. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/drop/template.py +0 -0
  121. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
  122. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
  123. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
  124. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
  125. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
  126. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
  127. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/gsm8k/template.py +0 -0
  128. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
  129. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
  130. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/hellaswag/task.py +0 -0
  131. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/hellaswag/template.py +0 -0
  132. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/human_eval/__init__.py +0 -0
  133. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
  134. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/human_eval/task.py +0 -0
  135. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/human_eval/template.py +0 -0
  136. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/ifeval/__init__.py +0 -0
  137. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
  138. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/ifeval/template.py +0 -0
  139. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/lambada/__init__.py +0 -0
  140. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/lambada/lambada.py +0 -0
  141. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/lambada/template.py +0 -0
  142. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
  143. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
  144. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/logi_qa/task.py +0 -0
  145. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/logi_qa/template.py +0 -0
  146. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/math_qa/__init__.py +0 -0
  147. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
  148. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/math_qa/task.py +0 -0
  149. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/math_qa/template.py +0 -0
  150. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/mmlu/__init__.py +0 -0
  151. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
  152. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/mmlu/task.py +0 -0
  153. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/mmlu/template.py +0 -0
  154. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/modes/__init__.py +0 -0
  155. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/results.py +0 -0
  156. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/schema.py +0 -0
  157. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/squad/__init__.py +0 -0
  158. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/squad/squad.py +0 -0
  159. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/squad/task.py +0 -0
  160. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/squad/template.py +0 -0
  161. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/tasks/__init__.py +0 -0
  162. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
  163. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
  164. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/truthful_qa/task.py +0 -0
  165. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/truthful_qa/template.py +0 -0
  166. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
  167. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/utils.py +0 -0
  168. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/winogrande/__init__.py +0 -0
  169. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/winogrande/template.py +0 -0
  170. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
  171. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/__init__.py +0 -0
  172. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/dotenv_handler.py +0 -0
  173. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/main.py +0 -0
  174. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/server.py +0 -0
  175. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/test.py +0 -0
  176. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/types.py +0 -0
  177. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/cli/utils.py +0 -0
  178. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/confident/__init__.py +0 -0
  179. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/confident/api.py +0 -0
  180. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/confident/types.py +0 -0
  181. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/config/__init__.py +0 -0
  182. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/config/settings_manager.py +0 -0
  183. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/__init__.py +0 -0
  184. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/api.py +0 -0
  185. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/dataset.py +0 -0
  186. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/golden.py +0 -0
  187. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/test_run_tracer.py +0 -0
  188. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/types.py +0 -0
  189. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/dataset/utils.py +0 -0
  190. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/errors.py +0 -0
  191. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/__init__.py +0 -0
  192. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/api.py +0 -0
  193. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/compare.py +0 -0
  194. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/configs.py +0 -0
  195. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/evaluate.py +0 -0
  196. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/execute.py +0 -0
  197. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/types.py +0 -0
  198. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/evaluate/utils.py +0 -0
  199. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/__init__.py +0 -0
  200. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/crewai/__init__.py +0 -0
  201. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/crewai/agent.py +0 -0
  202. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/crewai/handler.py +0 -0
  203. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/crewai/patch.py +0 -0
  204. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/hugging_face/__init__.py +0 -0
  205. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/hugging_face/callback.py +0 -0
  206. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
  207. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
  208. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/hugging_face/utils.py +0 -0
  209. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/langchain/__init__.py +0 -0
  210. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/langchain/callback.py +0 -0
  211. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/langchain/patch.py +0 -0
  212. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/langchain/utils.py +0 -0
  213. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/llama_index/__init__.py +0 -0
  214. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/llama_index/agent/patched.py +0 -0
  215. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/llama_index/handler.py +0 -0
  216. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/llama_index/utils.py +0 -0
  217. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/integrations/pydantic_ai/otel.py +0 -0
  218. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/key_handler.py +0 -0
  219. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/__init__.py +0 -0
  220. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
  221. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
  222. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/answer_relevancy/schema.py +0 -0
  223. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/answer_relevancy/template.py +0 -0
  224. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
  225. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
  226. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/arena_g_eval/schema.py +0 -0
  227. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/arena_g_eval/template.py +0 -0
  228. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/arena_g_eval/utils.py +0 -0
  229. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/argument_correctness/__init__.py +0 -0
  230. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
  231. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/argument_correctness/schema.py +0 -0
  232. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/argument_correctness/template.py +0 -0
  233. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/base_metric.py +0 -0
  234. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/bias/__init__.py +0 -0
  235. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/bias/bias.py +0 -0
  236. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/bias/schema.py +0 -0
  237. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/bias/template.py +0 -0
  238. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_precision/__init__.py +0 -0
  239. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
  240. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_precision/schema.py +0 -0
  241. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_precision/template.py +0 -0
  242. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_recall/__init__.py +0 -0
  243. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
  244. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_recall/schema.py +0 -0
  245. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_recall/template.py +0 -0
  246. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
  247. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
  248. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
  249. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/contextual_relevancy/template.py +0 -0
  250. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
  251. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
  252. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversation_completeness/schema.py +0 -0
  253. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversation_completeness/template.py +0 -0
  254. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_dag/__init__.py +0 -0
  255. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
  256. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_dag/nodes.py +0 -0
  257. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_dag/templates.py +0 -0
  258. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
  259. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
  260. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
  261. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/conversational_g_eval/template.py +0 -0
  262. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/__init__.py +0 -0
  263. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/dag.py +0 -0
  264. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/graph.py +0 -0
  265. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/nodes.py +0 -0
  266. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/schema.py +0 -0
  267. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/templates.py +0 -0
  268. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/dag/utils.py +0 -0
  269. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/faithfulness/__init__.py +0 -0
  270. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
  271. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/faithfulness/schema.py +0 -0
  272. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/faithfulness/template.py +0 -0
  273. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/g_eval/__init__.py +0 -0
  274. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/g_eval/g_eval.py +0 -0
  275. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/g_eval/schema.py +0 -0
  276. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/g_eval/template.py +0 -0
  277. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/g_eval/utils.py +0 -0
  278. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/hallucination/__init__.py +0 -0
  279. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/hallucination/hallucination.py +0 -0
  280. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/hallucination/schema.py +0 -0
  281. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/hallucination/template.py +0 -0
  282. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/indicator.py +0 -0
  283. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/json_correctness/__init__.py +0 -0
  284. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
  285. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/json_correctness/schema.py +0 -0
  286. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/json_correctness/template.py +0 -0
  287. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
  288. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
  289. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/knowledge_retention/schema.py +0 -0
  290. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/knowledge_retention/template.py +0 -0
  291. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp/__init__.py +0 -0
  292. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
  293. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
  294. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp/schema.py +0 -0
  295. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp/template.py +0 -0
  296. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
  297. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
  298. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/mcp_use_metric/template.py +0 -0
  299. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/misuse/__init__.py +0 -0
  300. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/misuse/misuse.py +0 -0
  301. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/misuse/schema.py +0 -0
  302. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/misuse/template.py +0 -0
  303. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
  304. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
  305. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
  306. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
  307. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
  308. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
  309. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
  310. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
  311. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
  312. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
  313. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
  314. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
  315. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
  316. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
  317. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
  318. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
  319. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
  320. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
  321. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
  322. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
  323. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
  324. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
  325. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
  326. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
  327. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
  328. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
  329. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
  330. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
  331. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
  332. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
  333. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
  334. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
  335. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
  336. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
  337. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
  338. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
  339. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
  340. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
  341. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
  342. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
  343. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
  344. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
  345. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
  346. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
  347. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
  348. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
  349. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
  350. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
  351. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/non_advice/__init__.py +0 -0
  352. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/non_advice/schema.py +0 -0
  353. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/non_advice/template.py +0 -0
  354. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/pii_leakage/__init__.py +0 -0
  355. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/pii_leakage/schema.py +0 -0
  356. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/pii_leakage/template.py +0 -0
  357. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
  358. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
  359. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/prompt_alignment/schema.py +0 -0
  360. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/prompt_alignment/template.py +0 -0
  361. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/ragas.py +0 -0
  362. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_adherence/__init__.py +0 -0
  363. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
  364. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_adherence/schema.py +0 -0
  365. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_adherence/template.py +0 -0
  366. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_violation/__init__.py +0 -0
  367. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_violation/role_violation.py +0 -0
  368. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_violation/schema.py +0 -0
  369. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/role_violation/template.py +0 -0
  370. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/summarization/__init__.py +0 -0
  371. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/summarization/schema.py +0 -0
  372. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/summarization/summarization.py +0 -0
  373. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/summarization/template.py +0 -0
  374. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/task_completion/__init__.py +0 -0
  375. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/task_completion/schema.py +0 -0
  376. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/task_completion/task_completion.py +0 -0
  377. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/task_completion/template.py +0 -0
  378. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/tool_correctness/__init__.py +0 -0
  379. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
  380. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/toxicity/__init__.py +0 -0
  381. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/toxicity/schema.py +0 -0
  382. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/toxicity/template.py +0 -0
  383. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/toxicity/toxicity.py +0 -0
  384. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
  385. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/turn_relevancy/schema.py +0 -0
  386. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/turn_relevancy/template.py +0 -0
  387. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
  388. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/metrics/utils.py +0 -0
  389. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/__init__.py +0 -0
  390. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/_summac_model.py +0 -0
  391. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/answer_relevancy_model.py +0 -0
  392. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/base_model.py +0 -0
  393. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/detoxify_model.py +0 -0
  394. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/embedding_models/__init__.py +0 -0
  395. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/hallucination_model.py +0 -0
  396. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/__init__.py +0 -0
  397. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/litellm_model.py +0 -0
  398. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/llms/utils.py +0 -0
  399. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/mlllms/__init__.py +0 -0
  400. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/summac_model.py +0 -0
  401. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/unbias_model.py +0 -0
  402. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/models/utils.py +0 -0
  403. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai/__init__.py +0 -0
  404. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai/extractors.py +0 -0
  405. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai/patch.py +0 -0
  406. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai/utils.py +0 -0
  407. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/__init__.py +0 -0
  408. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/agent.py +0 -0
  409. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/callback_handler.py +0 -0
  410. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/extractors.py +0 -0
  411. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/patch.py +0 -0
  412. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/openai_agents/runner.py +0 -0
  413. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/plugins/__init__.py +0 -0
  414. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/plugins/plugin.py +0 -0
  415. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/progress_context.py +0 -0
  416. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/prompt/__init__.py +0 -0
  417. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/prompt/api.py +0 -0
  418. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/prompt/prompt.py +0 -0
  419. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/prompt/utils.py +0 -0
  420. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/py.typed +0 -0
  421. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/red_teaming/README.md +0 -0
  422. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/scorer/__init__.py +0 -0
  423. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/scorer/scorer.py +0 -0
  424. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/simulator/__init__.py +0 -0
  425. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/simulator/conversation_simulator.py +0 -0
  426. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/simulator/schema.py +0 -0
  427. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/simulator/template.py +0 -0
  428. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/singleton.py +0 -0
  429. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/__init__.py +0 -0
  430. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/base_synthesizer.py +0 -0
  431. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/chunking/__init__.py +0 -0
  432. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/chunking/context_generator.py +0 -0
  433. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
  434. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/config.py +0 -0
  435. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/schema.py +0 -0
  436. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/synthesizer.py +0 -0
  437. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/templates/__init__.py +0 -0
  438. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/templates/template.py +0 -0
  439. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/templates/template_extraction.py +0 -0
  440. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/templates/template_prompt.py +0 -0
  441. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/types.py +0 -0
  442. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/synthesizer/utils.py +0 -0
  443. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/telemetry.py +0 -0
  444. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/__init__.py +0 -0
  445. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/arena_test_case.py +0 -0
  446. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/conversational_test_case.py +0 -0
  447. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/llm_test_case.py +0 -0
  448. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/mcp.py +0 -0
  449. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/mllm_test_case.py +0 -0
  450. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_case/utils.py +0 -0
  451. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/__init__.py +0 -0
  452. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/api.py +0 -0
  453. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/cache.py +0 -0
  454. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/hooks.py +0 -0
  455. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/hyperparameters.py +0 -0
  456. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/test_run/test_run.py +0 -0
  457. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/__init__.py +0 -0
  458. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/api.py +0 -0
  459. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/context.py +0 -0
  460. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/offline_evals/__init__.py +0 -0
  461. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/offline_evals/api.py +0 -0
  462. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/offline_evals/span.py +0 -0
  463. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/offline_evals/thread.py +0 -0
  464. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/offline_evals/trace.py +0 -0
  465. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/otel/__init__.py +0 -0
  466. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/otel/exporter.py +0 -0
  467. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/otel/utils.py +0 -0
  468. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/patchers.py +0 -0
  469. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/perf_epoch_bridge.py +0 -0
  470. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/types.py +0 -0
  471. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/tracing/utils.py +0 -0
  472. {deepeval-3.5.2 → deepeval-3.5.4}/deepeval/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepeval
3
- Version: 3.5.2
3
+ Version: 3.5.4
4
4
  Summary: The LLM Evaluation Framework
5
5
  Home-page: https://github.com/confident-ai/deepeval
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__: str = "3.5.4"
@@ -9,6 +9,7 @@ Central config for DeepEval.
9
9
  type coercion.
10
10
  """
11
11
 
12
+ import logging
12
13
  import os
13
14
  import re
14
15
 
@@ -16,11 +17,17 @@ from dotenv import dotenv_values
16
17
  from pathlib import Path
17
18
  from pydantic import AnyUrl, SecretStr, field_validator, confloat
18
19
  from pydantic_settings import BaseSettings, SettingsConfigDict
19
- from typing import Any, Dict, Optional, NamedTuple
20
+ from typing import Any, Dict, List, Optional, NamedTuple
20
21
 
21
- from deepeval.config.utils import parse_bool
22
+ from deepeval.config.utils import (
23
+ parse_bool,
24
+ coerce_to_list,
25
+ dedupe_preserve_order,
26
+ )
27
+ from deepeval.constants import SUPPORTED_PROVIDER_SLUGS, slugify
22
28
 
23
29
 
30
+ logger = logging.getLogger(__name__)
24
31
  _SAVE_RE = re.compile(r"^(?P<scheme>dotenv)(?::(?P<path>.+))?$")
25
32
 
26
33
 
@@ -264,6 +271,13 @@ class Settings(BaseSettings):
264
271
  LOCAL_EMBEDDING_MODEL_NAME: Optional[str] = None
265
272
  LOCAL_EMBEDDING_BASE_URL: Optional[AnyUrl] = None
266
273
 
274
+ #
275
+ # Retry Policy
276
+ #
277
+ DEEPEVAL_SDK_RETRY_PROVIDERS: Optional[List[str]] = None
278
+ DEEPEVAL_RETRY_BEFORE_LOG_LEVEL: Optional[int] = None # default -> INFO
279
+ DEEPEVAL_RETRY_AFTER_LOG_LEVEL: Optional[int] = None # default -> ERROR
280
+
267
281
  #
268
282
  # Telemetry and Debug
269
283
  #
@@ -283,6 +297,12 @@ class Settings(BaseSettings):
283
297
  CONFIDENT_SAMPLE_RATE: Optional[float] = 1.0
284
298
  OTEL_EXPORTER_OTLP_ENDPOINT: Optional[AnyUrl] = None
285
299
 
300
+ #
301
+ # Network
302
+ #
303
+ MEDIA_IMAGE_CONNECT_TIMEOUT_SECONDS: float = 3.05
304
+ MEDIA_IMAGE_READ_TIMEOUT_SECONDS: float = 10.0
305
+
286
306
  ##############
287
307
  # Validators #
288
308
  ##############
@@ -401,6 +421,78 @@ class Settings(BaseSettings):
401
421
  return None
402
422
  return s.upper()
403
423
 
424
+ @field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="before")
425
+ @classmethod
426
+ def _coerce_to_list(cls, v):
427
+ # works with JSON list, comma/space/semicolon separated, or real lists
428
+ return coerce_to_list(v, lower=True)
429
+
430
+ @field_validator("DEEPEVAL_SDK_RETRY_PROVIDERS", mode="after")
431
+ @classmethod
432
+ def _validate_sdk_provider_list(cls, v):
433
+ if v is None:
434
+ return None
435
+
436
+ normalized: list[str] = []
437
+ star = False
438
+
439
+ for item in v:
440
+ s = str(item).strip()
441
+ if not s:
442
+ continue
443
+ if s == "*":
444
+ star = True
445
+ continue
446
+ s = slugify(s)
447
+ if s in SUPPORTED_PROVIDER_SLUGS:
448
+ normalized.append(s)
449
+ else:
450
+ if cls.DEEPEVAL_VERBOSE_MODE:
451
+ logger.warning("Unknown provider slug %r dropped", item)
452
+
453
+ if star:
454
+ return ["*"]
455
+
456
+ # It is important to dedup after normalization to catch variants
457
+ normalized = dedupe_preserve_order(normalized)
458
+ return normalized or None
459
+
460
+ @field_validator(
461
+ "DEEPEVAL_RETRY_BEFORE_LOG_LEVEL",
462
+ "DEEPEVAL_RETRY_AFTER_LOG_LEVEL",
463
+ mode="before",
464
+ )
465
+ @classmethod
466
+ def _coerce_log_level(cls, v):
467
+ if v is None:
468
+ return None
469
+ if isinstance(v, (int, float)):
470
+ return int(v)
471
+
472
+ s = str(v).strip().upper()
473
+ if not s:
474
+ return None
475
+
476
+ import logging
477
+
478
+ # Accept standard names or numeric strings
479
+ name_to_level = {
480
+ "CRITICAL": logging.CRITICAL,
481
+ "ERROR": logging.ERROR,
482
+ "WARNING": logging.WARNING,
483
+ "INFO": logging.INFO,
484
+ "DEBUG": logging.DEBUG,
485
+ "NOTSET": logging.NOTSET,
486
+ }
487
+ if s.isdigit() or (s.startswith("-") and s[1:].isdigit()):
488
+ return int(s)
489
+ if s in name_to_level:
490
+ return name_to_level[s]
491
+ raise ValueError(
492
+ "Retry log level must be one of DEBUG, INFO, WARNING, ERROR, "
493
+ "CRITICAL, NOTSET, or a numeric logging level."
494
+ )
495
+
404
496
  #######################
405
497
  # Persistence support #
406
498
  #######################
@@ -1,8 +1,13 @@
1
+ import json
1
2
  import os
2
- from typing import Any, Optional
3
+ import re
4
+
5
+ from typing import Any, Iterable, List, Optional
6
+
3
7
 
4
8
  _TRUTHY = frozenset({"1", "true", "t", "yes", "y", "on", "enable", "enabled"})
5
9
  _FALSY = frozenset({"0", "false", "f", "no", "n", "off", "disable", "disabled"})
10
+ _LIST_SEP_RE = re.compile(r"[,\s;]+")
6
11
 
7
12
 
8
13
  def parse_bool(value: Any, default: bool = False) -> bool:
@@ -84,3 +89,51 @@ def set_env_bool(key: str, value: Optional[bool] = False) -> None:
84
89
  - Use `get_env_bool` to read back and parse the value safely.
85
90
  """
86
91
  os.environ[key] = bool_to_env_str(bool(value))
92
+
93
+
94
+ def coerce_to_list(
95
+ v,
96
+ *,
97
+ lower: bool = False,
98
+ allow_json: bool = True,
99
+ sep_re: re.Pattern = _LIST_SEP_RE,
100
+ ) -> Optional[List[str]]:
101
+ """
102
+ Coerce None / str / list / tuple / set into a clean List[str].
103
+ - Accepts JSON arrays ("[...]"") or delimited strings (comma/space/semicolon).
104
+ - Strips whitespace, drops empties, optionally lowercases.
105
+ """
106
+ if v is None:
107
+ return None
108
+ if isinstance(v, (list, tuple, set)):
109
+ items = list(v)
110
+ else:
111
+ s = str(v).strip()
112
+ if not s:
113
+ return None
114
+ if allow_json and s.startswith("[") and s.endswith("]"):
115
+ try:
116
+ parsed = json.loads(s)
117
+ items = parsed if isinstance(parsed, list) else [s]
118
+ except Exception:
119
+ items = sep_re.split(s)
120
+ else:
121
+ items = sep_re.split(s)
122
+
123
+ out: List[str] = []
124
+ for item in items:
125
+ s = str(item).strip()
126
+ if not s:
127
+ continue
128
+ out.append(s.lower() if lower else s)
129
+ return out or None
130
+
131
+
132
+ def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
133
+ seen = set()
134
+ out: List[str] = []
135
+ for x in items:
136
+ if x not in seen:
137
+ seen.add(x)
138
+ out.append(x)
139
+ return out
@@ -1,3 +1,5 @@
1
+ from enum import Enum
2
+
1
3
  KEY_FILE: str = ".deepeval"
2
4
  HIDDEN_DIR: str = ".deepeval"
3
5
  PYTEST_RUN_TEST_NAME: str = "CONFIDENT_AI_RUN_TEST_NAME"
@@ -11,3 +13,28 @@ CONFIDENT_TRACE_ENVIRONMENT = "CONFIDENT_TRACE_ENVIRONMENT"
11
13
  CONFIDENT_TRACING_ENABLED = "CONFIDENT_TRACING_ENABLED"
12
14
  CONFIDENT_OPEN_BROWSER = "CONFIDENT_OPEN_BROWSER"
13
15
  CONFIDENT_TEST_CASE_BATCH_SIZE = "CONFIDENT_TEST_CASE_BATCH_SIZE"
16
+
17
+
18
+ class ProviderSlug(str, Enum):
19
+ OPENAI = "openai"
20
+ AZURE = "azure"
21
+ ANTHROPIC = "anthropic"
22
+ BEDROCK = "bedrock"
23
+ DEEPSEEK = "deepseek"
24
+ GOOGLE = "google"
25
+ GROK = "grok"
26
+ KIMI = "kimi"
27
+ LITELLM = "litellm"
28
+ LOCAL = "local"
29
+ OLLAMA = "ollama"
30
+
31
+
32
+ def slugify(value: str | ProviderSlug) -> str:
33
+ return (
34
+ value.value
35
+ if isinstance(value, ProviderSlug)
36
+ else str(value).strip().lower()
37
+ )
38
+
39
+
40
+ SUPPORTED_PROVIDER_SLUGS = frozenset(s.value for s in ProviderSlug)
@@ -0,0 +1,5 @@
1
+ from .agent import DeepEvalPydanticAIAgent as Agent
2
+ from .patcher import instrument as instrument_pydantic_ai
3
+ from .otel import instrument_pydantic_ai as otel_instrument_pydantic_ai
4
+
5
+ __all__ = ["instrument_pydantic_ai", "Agent", otel_instrument_pydantic_ai]
@@ -0,0 +1,339 @@
1
+ import inspect
2
+ from typing import Optional, List, Generic, TypeVar
3
+ from contextvars import ContextVar
4
+ from contextlib import asynccontextmanager
5
+
6
+ from deepeval.prompt import Prompt
7
+ from deepeval.tracing.types import AgentSpan
8
+ from deepeval.tracing.tracing import Observer
9
+ from deepeval.metrics.base_metric import BaseMetric
10
+ from deepeval.tracing.context import current_span_context
11
+ from deepeval.integrations.pydantic_ai.utils import extract_tools_called
12
+
13
+ try:
14
+ from pydantic_ai.agent import Agent
15
+ from pydantic_ai.tools import AgentDepsT
16
+ from pydantic_ai.output import OutputDataT
17
+ from deepeval.integrations.pydantic_ai.utils import (
18
+ create_patched_tool,
19
+ update_trace_context,
20
+ patch_llm_model,
21
+ )
22
+
23
+ is_pydantic_ai_installed = True
24
+ except:
25
+ is_pydantic_ai_installed = False
26
+
27
+
28
+ def pydantic_ai_installed():
29
+ if not is_pydantic_ai_installed:
30
+ raise ImportError(
31
+ "Pydantic AI is not installed. Please install it with `pip install pydantic-ai`."
32
+ )
33
+
34
+
35
+ _IS_RUN_SYNC = ContextVar("deepeval_is_run_sync", default=False)
36
+
37
+
38
+ class DeepEvalPydanticAIAgent(
39
+ Agent[AgentDepsT, OutputDataT], Generic[AgentDepsT, OutputDataT]
40
+ ):
41
+
42
+ trace_name: Optional[str] = None
43
+ trace_tags: Optional[List[str]] = None
44
+ trace_metadata: Optional[dict] = None
45
+ trace_thread_id: Optional[str] = None
46
+ trace_user_id: Optional[str] = None
47
+ trace_metric_collection: Optional[str] = None
48
+ trace_metrics: Optional[List[BaseMetric]] = None
49
+
50
+ llm_prompt: Optional[Prompt] = None
51
+ llm_metrics: Optional[List[BaseMetric]] = None
52
+ llm_metric_collection: Optional[str] = None
53
+
54
+ agent_metrics: Optional[List[BaseMetric]] = None
55
+ agent_metric_collection: Optional[str] = None
56
+
57
+ def __init__(
58
+ self,
59
+ *args,
60
+ trace_name: Optional[str] = None,
61
+ trace_tags: Optional[List[str]] = None,
62
+ trace_metadata: Optional[dict] = None,
63
+ trace_thread_id: Optional[str] = None,
64
+ trace_user_id: Optional[str] = None,
65
+ trace_metric_collection: Optional[str] = None,
66
+ trace_metrics: Optional[List[BaseMetric]] = None,
67
+ llm_metric_collection: Optional[str] = None,
68
+ llm_metrics: Optional[List[BaseMetric]] = None,
69
+ llm_prompt: Optional[Prompt] = None,
70
+ agent_metric_collection: Optional[str] = None,
71
+ agent_metrics: Optional[List[BaseMetric]] = None,
72
+ **kwargs
73
+ ):
74
+ pydantic_ai_installed()
75
+
76
+ self.trace_name = trace_name
77
+ self.trace_tags = trace_tags
78
+ self.trace_metadata = trace_metadata
79
+ self.trace_thread_id = trace_thread_id
80
+ self.trace_user_id = trace_user_id
81
+ self.trace_metric_collection = trace_metric_collection
82
+ self.trace_metrics = trace_metrics
83
+
84
+ self.llm_metric_collection = llm_metric_collection
85
+ self.llm_metrics = llm_metrics
86
+ self.llm_prompt = llm_prompt
87
+
88
+ self.agent_metric_collection = agent_metric_collection
89
+ self.agent_metrics = agent_metrics
90
+
91
+ super().__init__(*args, **kwargs)
92
+
93
+ patch_llm_model(
94
+ self._model, llm_metric_collection, llm_metrics, llm_prompt
95
+ ) # TODO: Add dual patch guards
96
+
97
+ async def run(
98
+ self,
99
+ *args,
100
+ name: Optional[str] = None,
101
+ tags: Optional[List[str]] = None,
102
+ user_id: Optional[str] = None,
103
+ metadata: Optional[dict] = None,
104
+ thread_id: Optional[str] = None,
105
+ metrics: Optional[List[BaseMetric]] = None,
106
+ metric_collection: Optional[str] = None,
107
+ **kwargs
108
+ ):
109
+ sig = inspect.signature(super().run)
110
+ bound = sig.bind_partial(*args, **kwargs)
111
+ bound.apply_defaults()
112
+ input = bound.arguments.get("user_prompt", None)
113
+
114
+ agent_name = super().name if super().name is not None else "Agent"
115
+
116
+ with Observer(
117
+ span_type="agent" if not _IS_RUN_SYNC.get() else "custom",
118
+ func_name=agent_name if not _IS_RUN_SYNC.get() else "run",
119
+ function_kwargs={"input": input},
120
+ metrics=self.agent_metrics if not _IS_RUN_SYNC.get() else None,
121
+ metric_collection=(
122
+ self.agent_metric_collection if not _IS_RUN_SYNC.get() else None
123
+ ),
124
+ ) as observer:
125
+ result = await super().run(*args, **kwargs)
126
+ observer.result = result.output
127
+ update_trace_context(
128
+ trace_name=name if name is not None else self.trace_name,
129
+ trace_tags=tags if tags is not None else self.trace_tags,
130
+ trace_metadata=(
131
+ metadata if metadata is not None else self.trace_metadata
132
+ ),
133
+ trace_thread_id=(
134
+ thread_id if thread_id is not None else self.trace_thread_id
135
+ ),
136
+ trace_user_id=(
137
+ user_id if user_id is not None else self.trace_user_id
138
+ ),
139
+ trace_metric_collection=(
140
+ metric_collection
141
+ if metric_collection is not None
142
+ else self.trace_metric_collection
143
+ ),
144
+ trace_metrics=(
145
+ metrics if metrics is not None else self.trace_metrics
146
+ ),
147
+ trace_input=input,
148
+ trace_output=result.output,
149
+ )
150
+
151
+ agent_span: AgentSpan = current_span_context.get()
152
+ try:
153
+ agent_span.tools_called = extract_tools_called(result)
154
+ except:
155
+ pass
156
+ # TODO: available tools
157
+ # TODO: agent handoffs
158
+
159
+ return result
160
+
161
+ def run_sync(
162
+ self,
163
+ *args,
164
+ name: Optional[str] = None,
165
+ tags: Optional[List[str]] = None,
166
+ metadata: Optional[dict] = None,
167
+ thread_id: Optional[str] = None,
168
+ user_id: Optional[str] = None,
169
+ metric_collection: Optional[str] = None,
170
+ metrics: Optional[List[BaseMetric]] = None,
171
+ **kwargs
172
+ ):
173
+ sig = inspect.signature(super().run_sync)
174
+ bound = sig.bind_partial(*args, **kwargs)
175
+ bound.apply_defaults()
176
+ input = bound.arguments.get("user_prompt", None)
177
+
178
+ token = _IS_RUN_SYNC.set(True)
179
+
180
+ agent_name = super().name if super().name is not None else "Agent"
181
+
182
+ with Observer(
183
+ span_type="agent",
184
+ func_name=agent_name,
185
+ function_kwargs={"input": input},
186
+ metrics=self.agent_metrics,
187
+ metric_collection=self.agent_metric_collection,
188
+ ) as observer:
189
+ try:
190
+ result = super().run_sync(*args, **kwargs)
191
+ finally:
192
+ _IS_RUN_SYNC.reset(token)
193
+
194
+ observer.result = result.output
195
+ update_trace_context(
196
+ trace_name=name if name is not None else self.trace_name,
197
+ trace_tags=tags if tags is not None else self.trace_tags,
198
+ trace_metadata=(
199
+ metadata if metadata is not None else self.trace_metadata
200
+ ),
201
+ trace_thread_id=(
202
+ thread_id if thread_id is not None else self.trace_thread_id
203
+ ),
204
+ trace_user_id=(
205
+ user_id if user_id is not None else self.trace_user_id
206
+ ),
207
+ trace_metric_collection=(
208
+ metric_collection
209
+ if metric_collection is not None
210
+ else self.trace_metric_collection
211
+ ),
212
+ trace_metrics=(
213
+ metrics if metrics is not None else self.trace_metrics
214
+ ),
215
+ trace_input=input,
216
+ trace_output=result.output,
217
+ )
218
+
219
+ agent_span: AgentSpan = current_span_context.get()
220
+ try:
221
+ agent_span.tools_called = extract_tools_called(result)
222
+ except:
223
+ pass
224
+
225
+ # TODO: available tools
226
+ # TODO: agent handoffs
227
+
228
+ return result
229
+
230
+ @asynccontextmanager
231
+ async def run_stream(
232
+ self,
233
+ *args,
234
+ name: Optional[str] = None,
235
+ tags: Optional[List[str]] = None,
236
+ metadata: Optional[dict] = None,
237
+ thread_id: Optional[str] = None,
238
+ user_id: Optional[str] = None,
239
+ metric_collection: Optional[str] = None,
240
+ metrics: Optional[List[BaseMetric]] = None,
241
+ **kwargs
242
+ ):
243
+ sig = inspect.signature(super().run_stream)
244
+ super_params = sig.parameters
245
+ super_kwargs = {k: v for k, v in kwargs.items() if k in super_params}
246
+ bound = sig.bind_partial(*args, **super_kwargs)
247
+ bound.apply_defaults()
248
+ input = bound.arguments.get("user_prompt", None)
249
+
250
+ agent_name = super().name if super().name is not None else "Agent"
251
+
252
+ with Observer(
253
+ span_type="agent",
254
+ func_name=agent_name,
255
+ function_kwargs={"input": input},
256
+ metrics=self.agent_metrics,
257
+ metric_collection=self.agent_metric_collection,
258
+ ) as observer:
259
+ final_result = None
260
+ async with super().run_stream(*args, **super_kwargs) as result:
261
+ try:
262
+ yield result
263
+ finally:
264
+ try:
265
+ final_result = await result.get_output()
266
+ observer.result = final_result
267
+ except Exception:
268
+ pass
269
+
270
+ update_trace_context(
271
+ trace_name=(
272
+ name if name is not None else self.trace_name
273
+ ),
274
+ trace_tags=(
275
+ tags if tags is not None else self.trace_tags
276
+ ),
277
+ trace_metadata=(
278
+ metadata
279
+ if metadata is not None
280
+ else self.trace_metadata
281
+ ),
282
+ trace_thread_id=(
283
+ thread_id
284
+ if thread_id is not None
285
+ else self.trace_thread_id
286
+ ),
287
+ trace_user_id=(
288
+ user_id
289
+ if user_id is not None
290
+ else self.trace_user_id
291
+ ),
292
+ trace_metric_collection=(
293
+ metric_collection
294
+ if metric_collection is not None
295
+ else self.trace_metric_collection
296
+ ),
297
+ trace_metrics=(
298
+ metrics
299
+ if metrics is not None
300
+ else self.trace_metrics
301
+ ),
302
+ trace_input=input,
303
+ trace_output=(
304
+ final_result if final_result is not None else None
305
+ ),
306
+ )
307
+ agent_span: AgentSpan = current_span_context.get()
308
+ try:
309
+ if final_result is not None:
310
+ agent_span.tools_called = extract_tools_called(
311
+ final_result
312
+ )
313
+ except:
314
+ pass
315
+
316
+ def tool(
317
+ self,
318
+ *args,
319
+ metrics: Optional[List[BaseMetric]] = None,
320
+ metric_collection: Optional[str] = None,
321
+ **kwargs
322
+ ):
323
+ # Direct decoration: @agent.tool
324
+ if args and callable(args[0]):
325
+ patched_func = create_patched_tool(
326
+ args[0], metrics, metric_collection
327
+ )
328
+ new_args = (patched_func,) + args[1:]
329
+ return super(DeepEvalPydanticAIAgent, self).tool(
330
+ *new_args, **kwargs
331
+ )
332
+ # Decoration with args: @agent.tool(...)
333
+ super_tool = super(DeepEvalPydanticAIAgent, self).tool
334
+
335
+ def decorator(func):
336
+ patched_func = create_patched_tool(func, metrics, metric_collection)
337
+ return super_tool(*args, **kwargs)(patched_func)
338
+
339
+ return decorator