deepeval 3.5.9__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.5.9 → deepeval-3.6.0}/PKG-INFO +1 -1
- deepeval-3.6.0/deepeval/_version.py +1 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/settings_manager.py +1 -1
- deepeval-3.6.0/deepeval/contextvars.py +25 -0
- deepeval-3.6.0/deepeval/dataset/__init__.py +11 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/execute.py +15 -3
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai_agents/__init__.py +4 -3
- deepeval-3.6.0/deepeval/openai_agents/agent.py +36 -0
- deepeval-3.6.0/deepeval/openai_agents/callback_handler.py +135 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai_agents/extractors.py +83 -7
- deepeval-3.6.0/deepeval/openai_agents/patch.py +309 -0
- deepeval-3.6.0/deepeval/openai_agents/runner.py +348 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/context.py +1 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/tracing.py +3 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/utils.py +4 -3
- {deepeval-3.5.9 → deepeval-3.6.0}/pyproject.toml +1 -1
- deepeval-3.5.9/deepeval/_version.py +0 -1
- deepeval-3.5.9/deepeval/dataset/__init__.py +0 -5
- deepeval-3.5.9/deepeval/openai_agents/agent.py +0 -194
- deepeval-3.5.9/deepeval/openai_agents/callback_handler.py +0 -134
- deepeval-3.5.9/deepeval/openai_agents/patch.py +0 -115
- deepeval-3.5.9/deepeval/openai_agents/runner.py +0 -335
- {deepeval-3.5.9 → deepeval-3.6.0}/LICENSE.md +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/README.md +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/annotation/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/main.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/server.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/test.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/cli/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/confident/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/settings.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/config/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/constants.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/dataset.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/errors.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/agent.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/crewai/patch.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/agent/patched.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/key_handler.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/base_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/gemini_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/gemini_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/ollama_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/mlllms/openai_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/models/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/patch.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/openai/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/progress_context.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/prompt.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/py.typed +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/simulator/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/singleton.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/telemetry.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/types.py +0 -0
- {deepeval-3.5.9 → deepeval-3.6.0}/deepeval/tracing/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.6.0"
|
|
@@ -15,7 +15,7 @@ from enum import Enum
|
|
|
15
15
|
from pydantic import SecretStr
|
|
16
16
|
from deepeval.config.settings import get_settings, _SAVE_RE
|
|
17
17
|
from deepeval.cli.dotenv_handler import DotenvHandler
|
|
18
|
-
from deepeval.utils import bool_to_env_str
|
|
18
|
+
from deepeval.config.utils import bool_to_env_str
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
StrOrEnum = Union[str, Enum]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from deepeval.dataset.golden import Golden
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
CURRENT_GOLDEN: ContextVar[Optional[Golden]] = ContextVar(
|
|
12
|
+
"CURRENT_GOLDEN", default=None
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def set_current_golden(golden: Optional[Golden]):
|
|
17
|
+
return CURRENT_GOLDEN.set(golden)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_current_golden() -> Optional[Golden]:
|
|
21
|
+
return CURRENT_GOLDEN.get()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def reset_current_golden(token) -> None:
|
|
25
|
+
CURRENT_GOLDEN.reset(token)
|
|
@@ -42,6 +42,7 @@ from deepeval.tracing.api import (
|
|
|
42
42
|
BaseApiSpan,
|
|
43
43
|
)
|
|
44
44
|
from deepeval.dataset import Golden
|
|
45
|
+
from deepeval.contextvars import set_current_golden, reset_current_golden
|
|
45
46
|
from deepeval.errors import MissingTestCaseParamsError
|
|
46
47
|
from deepeval.metrics.utils import copy_metrics
|
|
47
48
|
from deepeval.utils import (
|
|
@@ -1480,6 +1481,7 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1480
1481
|
)
|
|
1481
1482
|
|
|
1482
1483
|
for golden in goldens:
|
|
1484
|
+
token = set_current_golden(golden)
|
|
1483
1485
|
with capture_evaluation_run("golden"):
|
|
1484
1486
|
# yield golden
|
|
1485
1487
|
count += 1
|
|
@@ -1492,8 +1494,14 @@ def execute_agentic_test_cases_from_loop(
|
|
|
1492
1494
|
_progress=progress,
|
|
1493
1495
|
_pbar_callback_id=pbar_tags_id,
|
|
1494
1496
|
):
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
+
try:
|
|
1498
|
+
# yield golden to user code
|
|
1499
|
+
yield golden
|
|
1500
|
+
# control has returned from user code without error, capture trace now
|
|
1501
|
+
current_trace: Trace = current_trace_context.get()
|
|
1502
|
+
finally:
|
|
1503
|
+
# after user code returns control, always reset the context
|
|
1504
|
+
reset_current_golden(token)
|
|
1497
1505
|
|
|
1498
1506
|
update_pbar(progress, pbar_tags_id)
|
|
1499
1507
|
update_pbar(progress, pbar_id)
|
|
@@ -1849,6 +1857,7 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1849
1857
|
|
|
1850
1858
|
try:
|
|
1851
1859
|
for index, golden in enumerate(goldens):
|
|
1860
|
+
token = set_current_golden(golden)
|
|
1852
1861
|
current_golden_ctx.update(
|
|
1853
1862
|
{
|
|
1854
1863
|
"index": index,
|
|
@@ -1857,7 +1866,10 @@ def a_execute_agentic_test_cases_from_loop(
|
|
|
1857
1866
|
}
|
|
1858
1867
|
)
|
|
1859
1868
|
prev_task_length = len(created_tasks)
|
|
1860
|
-
|
|
1869
|
+
try:
|
|
1870
|
+
yield golden
|
|
1871
|
+
finally:
|
|
1872
|
+
reset_current_golden(token)
|
|
1861
1873
|
# if this golden created no tasks, bump bars now
|
|
1862
1874
|
if len(created_tasks) == prev_task_length:
|
|
1863
1875
|
update_pbar(progress, pbar_callback_id)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from deepeval.openai_agents.callback_handler import DeepEvalTracingProcessor
|
|
2
|
-
from deepeval.openai_agents.runner import Runner
|
|
3
|
-
from deepeval.openai_agents.patch import function_tool
|
|
4
2
|
from deepeval.openai_agents.agent import DeepEvalAgent as Agent
|
|
3
|
+
from deepeval.openai_agents.patch import function_tool
|
|
4
|
+
|
|
5
|
+
# from deepeval.openai_agents.runner import Runner
|
|
5
6
|
|
|
6
|
-
__all__ = ["DeepEvalTracingProcessor", "
|
|
7
|
+
__all__ = ["DeepEvalTracingProcessor", "Agent", "function_tool"]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Generic, TypeVar, List
|
|
5
|
+
|
|
6
|
+
from deepeval.prompt import Prompt
|
|
7
|
+
from deepeval.metrics import BaseMetric
|
|
8
|
+
from deepeval.tracing.types import LlmSpan
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from agents.agent import Agent as BaseAgent
|
|
12
|
+
from deepeval.openai_agents.patch import (
|
|
13
|
+
patch_default_agent_runner_get_model,
|
|
14
|
+
)
|
|
15
|
+
except Exception as e:
|
|
16
|
+
raise RuntimeError(
|
|
17
|
+
"openai-agents is required for this integration. Please install it."
|
|
18
|
+
) from e
|
|
19
|
+
|
|
20
|
+
TContext = TypeVar("TContext")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class DeepEvalAgent(BaseAgent[TContext], Generic[TContext]):
|
|
25
|
+
"""
|
|
26
|
+
A subclass of agents.Agent.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
llm_metric_collection: str = None
|
|
30
|
+
llm_metrics: List[BaseMetric] = None
|
|
31
|
+
confident_prompt: Prompt = None
|
|
32
|
+
agent_metrics: List[BaseMetric] = None
|
|
33
|
+
agent_metric_collection: str = None
|
|
34
|
+
|
|
35
|
+
def __post_init__(self):
|
|
36
|
+
patch_default_agent_runner_get_model()
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from deepeval.tracing.tracing import (
|
|
2
|
+
Observer,
|
|
3
|
+
current_span_context,
|
|
4
|
+
trace_manager,
|
|
5
|
+
)
|
|
6
|
+
from deepeval.openai_agents.extractors import *
|
|
7
|
+
from deepeval.tracing.context import current_trace_context
|
|
8
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
+
from time import perf_counter
|
|
10
|
+
from deepeval.tracing.types import TraceSpanStatus
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from agents.tracing import Span, Trace, TracingProcessor
|
|
14
|
+
from agents.tracing.span_data import (
|
|
15
|
+
AgentSpanData,
|
|
16
|
+
CustomSpanData,
|
|
17
|
+
FunctionSpanData,
|
|
18
|
+
GenerationSpanData,
|
|
19
|
+
GuardrailSpanData,
|
|
20
|
+
HandoffSpanData,
|
|
21
|
+
ResponseSpanData,
|
|
22
|
+
SpanData,
|
|
23
|
+
)
|
|
24
|
+
from deepeval.openai_agents.patch import (
|
|
25
|
+
patch_default_agent_run_single_turn,
|
|
26
|
+
patch_default_agent_run_single_turn_streamed,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
openai_agents_available = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
openai_agents_available = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _check_openai_agents_available():
|
|
35
|
+
if not openai_agents_available:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"openai-agents is required for this integration. Install it via your package manager"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DeepEvalTracingProcessor(TracingProcessor):
|
|
42
|
+
def __init__(self) -> None:
|
|
43
|
+
_check_openai_agents_available()
|
|
44
|
+
patch_default_agent_run_single_turn()
|
|
45
|
+
patch_default_agent_run_single_turn_streamed()
|
|
46
|
+
self.span_observers: dict[str, Observer] = {}
|
|
47
|
+
|
|
48
|
+
def on_trace_start(self, trace: "Trace") -> None:
|
|
49
|
+
trace_dict = trace.export()
|
|
50
|
+
_trace_uuid = trace_dict.get("id")
|
|
51
|
+
_thread_id = trace_dict.get("group_id")
|
|
52
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
53
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
54
|
+
|
|
55
|
+
_trace = trace_manager.start_new_trace(trace_uuid=str(_trace_uuid))
|
|
56
|
+
_trace.thread_id = str(_thread_id)
|
|
57
|
+
_trace.name = str(_trace_name)
|
|
58
|
+
_trace.metadata = make_json_serializable(_trace_metadata)
|
|
59
|
+
current_trace_context.set(_trace)
|
|
60
|
+
|
|
61
|
+
trace_manager.add_span( # adds a dummy root span
|
|
62
|
+
BaseSpan(
|
|
63
|
+
uuid=_trace_uuid,
|
|
64
|
+
trace_uuid=_trace_uuid,
|
|
65
|
+
parent_uuid=None,
|
|
66
|
+
start_time=perf_counter(),
|
|
67
|
+
name=_trace_name,
|
|
68
|
+
status=TraceSpanStatus.IN_PROGRESS,
|
|
69
|
+
children=[],
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def on_trace_end(self, trace: "Trace") -> None:
|
|
74
|
+
trace_dict = trace.export()
|
|
75
|
+
_trace_uuid = trace_dict.get("id")
|
|
76
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
77
|
+
|
|
78
|
+
trace_manager.remove_span(_trace_uuid) # removing the dummy root span
|
|
79
|
+
trace_manager.end_trace(_trace_uuid)
|
|
80
|
+
current_trace_context.set(None)
|
|
81
|
+
|
|
82
|
+
def on_span_start(self, span: "Span") -> None:
|
|
83
|
+
if not span.started_at:
|
|
84
|
+
return
|
|
85
|
+
current_span = current_span_context.get()
|
|
86
|
+
if current_span and isinstance(current_span, LlmSpan):
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
span_type = self.get_span_kind(span.span_data)
|
|
90
|
+
observer = Observer(span_type=span_type, func_name="NA")
|
|
91
|
+
if span_type == "llm":
|
|
92
|
+
observer.observe_kwargs["model"] = "temporary model"
|
|
93
|
+
observer.update_span_properties = (
|
|
94
|
+
lambda span_type: update_span_properties(span_type, span.span_data)
|
|
95
|
+
)
|
|
96
|
+
self.span_observers[span.span_id] = observer
|
|
97
|
+
observer.__enter__()
|
|
98
|
+
|
|
99
|
+
def on_span_end(self, span: "Span") -> None:
|
|
100
|
+
update_trace_properties_from_span_data(
|
|
101
|
+
current_trace_context.get(), span.span_data
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
current_span = current_span_context.get()
|
|
105
|
+
if current_span and isinstance(current_span, LlmSpan):
|
|
106
|
+
update_span_properties(current_span, span.span_data)
|
|
107
|
+
return
|
|
108
|
+
observer = self.span_observers.pop(span.span_id, None)
|
|
109
|
+
if observer:
|
|
110
|
+
observer.__exit__(None, None, None)
|
|
111
|
+
|
|
112
|
+
def force_flush(self) -> None:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
def shutdown(self) -> None:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
def get_span_kind(self, span_data: "SpanData") -> str:
|
|
119
|
+
if isinstance(span_data, AgentSpanData):
|
|
120
|
+
return "agent"
|
|
121
|
+
if isinstance(span_data, FunctionSpanData):
|
|
122
|
+
return "tool"
|
|
123
|
+
if isinstance(span_data, MCPListToolsSpanData):
|
|
124
|
+
return "tool"
|
|
125
|
+
if isinstance(span_data, GenerationSpanData):
|
|
126
|
+
return "llm"
|
|
127
|
+
if isinstance(span_data, ResponseSpanData):
|
|
128
|
+
return "llm"
|
|
129
|
+
if isinstance(span_data, HandoffSpanData):
|
|
130
|
+
return "custom"
|
|
131
|
+
if isinstance(span_data, CustomSpanData):
|
|
132
|
+
return "base"
|
|
133
|
+
if isinstance(span_data, GuardrailSpanData):
|
|
134
|
+
return "base"
|
|
135
|
+
return "base"
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
from deepeval.tracing.types import Trace
|
|
1
2
|
from openai.types.responses.response_input_item_param import (
|
|
2
3
|
FunctionCallOutput,
|
|
3
4
|
Message,
|
|
4
5
|
)
|
|
5
6
|
from openai.types.responses.response_output_message_param import Content
|
|
6
|
-
from typing import Union, List
|
|
7
|
+
from typing import Union, List, Optional
|
|
7
8
|
from openai.types.responses import (
|
|
8
9
|
ResponseFunctionToolCallParam,
|
|
9
10
|
ResponseOutputMessageParam,
|
|
@@ -25,6 +26,8 @@ from deepeval.tracing.types import (
|
|
|
25
26
|
)
|
|
26
27
|
import json
|
|
27
28
|
|
|
29
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
30
|
+
|
|
28
31
|
try:
|
|
29
32
|
from agents import MCPListToolsSpanData
|
|
30
33
|
from agents.tracing.span_data import (
|
|
@@ -89,13 +92,17 @@ def update_span_properties_from_response_span_data(
|
|
|
89
92
|
return
|
|
90
93
|
# Extract usage tokens
|
|
91
94
|
usage = response.usage
|
|
95
|
+
cached_input_tokens = None
|
|
96
|
+
ouptut_reasoning_tokens = None
|
|
92
97
|
if usage:
|
|
93
98
|
output_tokens = usage.output_tokens
|
|
94
99
|
input_tokens = usage.input_tokens
|
|
95
100
|
cached_input_tokens = usage.input_tokens_details.cached_tokens
|
|
96
101
|
ouptut_reasoning_tokens = usage.output_tokens_details.reasoning_tokens
|
|
97
102
|
# Get input and output
|
|
98
|
-
input = parse_response_input(
|
|
103
|
+
input = parse_response_input(
|
|
104
|
+
span_data.input, span_data.response.instructions
|
|
105
|
+
)
|
|
99
106
|
raw_output = parse_response_output(response.output)
|
|
100
107
|
output = (
|
|
101
108
|
raw_output if isinstance(raw_output, str) else json.dumps(raw_output)
|
|
@@ -112,6 +119,23 @@ def update_span_properties_from_response_span_data(
|
|
|
112
119
|
span.input = input
|
|
113
120
|
span.output = output
|
|
114
121
|
span.name = "LLM Generation"
|
|
122
|
+
response_dict = response.model_dump(exclude_none=True, mode="json")
|
|
123
|
+
span.metadata["invocation_params"] = {
|
|
124
|
+
k: v
|
|
125
|
+
for k, v in response_dict.items()
|
|
126
|
+
if k
|
|
127
|
+
in (
|
|
128
|
+
"max_output_tokens",
|
|
129
|
+
"parallel_tool_calls",
|
|
130
|
+
"reasoning",
|
|
131
|
+
"temperature",
|
|
132
|
+
"text",
|
|
133
|
+
"tool_choice",
|
|
134
|
+
"tools",
|
|
135
|
+
"top_p",
|
|
136
|
+
"truncation",
|
|
137
|
+
)
|
|
138
|
+
}
|
|
115
139
|
|
|
116
140
|
|
|
117
141
|
def update_span_properties_from_generation_span_data(
|
|
@@ -136,6 +160,11 @@ def update_span_properties_from_generation_span_data(
|
|
|
136
160
|
span.input = input
|
|
137
161
|
span.output = output
|
|
138
162
|
span.name = "LLM Generation"
|
|
163
|
+
span.metadata["invocation_params"] = {
|
|
164
|
+
"model_config": make_json_serializable(
|
|
165
|
+
generation_span_data.model_config
|
|
166
|
+
),
|
|
167
|
+
}
|
|
139
168
|
|
|
140
169
|
|
|
141
170
|
########################################################
|
|
@@ -191,8 +220,6 @@ def update_span_properties_from_agent_span_data(
|
|
|
191
220
|
if agent_span_data.output_type:
|
|
192
221
|
metadata["output_type"] = agent_span_data.output_type
|
|
193
222
|
span.metadata = metadata
|
|
194
|
-
span.input = None
|
|
195
|
-
span.output = None
|
|
196
223
|
|
|
197
224
|
|
|
198
225
|
########################################################
|
|
@@ -238,10 +265,30 @@ def update_span_properties_from_guardrail_span_data(
|
|
|
238
265
|
########################################################
|
|
239
266
|
|
|
240
267
|
|
|
241
|
-
def parse_response_input(
|
|
242
|
-
|
|
243
|
-
|
|
268
|
+
def parse_response_input(
|
|
269
|
+
input: Union[str, List[ResponseInputItemParam]],
|
|
270
|
+
instructions: Optional[Union[str, List[ResponseInputItemParam]]] = None,
|
|
271
|
+
):
|
|
272
|
+
|
|
244
273
|
processed_input = []
|
|
274
|
+
|
|
275
|
+
if isinstance(input, str) and isinstance(instructions, str):
|
|
276
|
+
return [
|
|
277
|
+
{"type": "message", "role": "system", "content": instructions},
|
|
278
|
+
{"type": "message", "role": "user", "content": input},
|
|
279
|
+
]
|
|
280
|
+
elif isinstance(input, list) and isinstance(instructions, list):
|
|
281
|
+
input = instructions + input
|
|
282
|
+
elif isinstance(input, list) and isinstance(instructions, str):
|
|
283
|
+
processed_input += [
|
|
284
|
+
{"type": "message", "role": "system", "content": instructions}
|
|
285
|
+
]
|
|
286
|
+
elif isinstance(input, str) and isinstance(instructions, list):
|
|
287
|
+
processed_input += [
|
|
288
|
+
{"type": "message", "role": "user", "content": input}
|
|
289
|
+
]
|
|
290
|
+
input = instructions
|
|
291
|
+
|
|
245
292
|
for item in input:
|
|
246
293
|
if "type" not in item:
|
|
247
294
|
if "role" in item and "content" in item:
|
|
@@ -365,3 +412,32 @@ def parse_function_call(
|
|
|
365
412
|
"name": function_call.name,
|
|
366
413
|
"arguments": function_call.arguments,
|
|
367
414
|
}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def update_trace_properties_from_span_data(
|
|
418
|
+
trace: Trace,
|
|
419
|
+
span_data: Union["ResponseSpanData", "GenerationSpanData"],
|
|
420
|
+
):
|
|
421
|
+
if isinstance(span_data, ResponseSpanData):
|
|
422
|
+
if not trace.input:
|
|
423
|
+
trace.input = parse_response_input(
|
|
424
|
+
span_data.input, span_data.response.instructions
|
|
425
|
+
)
|
|
426
|
+
raw_output = parse_response_output(span_data.response.output)
|
|
427
|
+
output = (
|
|
428
|
+
raw_output
|
|
429
|
+
if isinstance(raw_output, str)
|
|
430
|
+
else json.dumps(raw_output)
|
|
431
|
+
)
|
|
432
|
+
trace.output = output
|
|
433
|
+
|
|
434
|
+
elif isinstance(span_data, GenerationSpanData):
|
|
435
|
+
if not trace.input:
|
|
436
|
+
trace.input = span_data.input
|
|
437
|
+
raw_output = span_data.output
|
|
438
|
+
output = (
|
|
439
|
+
raw_output
|
|
440
|
+
if isinstance(raw_output, str)
|
|
441
|
+
else json.dumps(raw_output)
|
|
442
|
+
)
|
|
443
|
+
trace.output = output
|