deepeval 3.4.6__tar.gz → 3.4.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.4.6 → deepeval-3.4.7}/PKG-INFO +26 -1
- {deepeval-3.4.6 → deepeval-3.4.7}/README.md +24 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/__init__.py +7 -2
- deepeval-3.4.7/deepeval/_version.py +1 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/main.py +1 -1
- deepeval-3.4.7/deepeval/env.py +35 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/utils.py +7 -1
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/__init__.py +1 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/callback.py +4 -0
- deepeval-3.4.7/deepeval/integrations/langchain/patch.py +32 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/agent.py +91 -1
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/setup.py +0 -1
- deepeval-3.4.7/deepeval/openai_agents/__init__.py +6 -0
- deepeval-3.4.7/deepeval/openai_agents/agent.py +184 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai_agents/callback_handler.py +28 -21
- deepeval-3.4.7/deepeval/openai_agents/patch.py +115 -0
- deepeval-3.4.7/deepeval/openai_agents/runner.py +114 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/telemetry.py +2 -2
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/api.py +11 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/context.py +5 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/tracing.py +20 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/types.py +2 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/pyproject.toml +3 -2
- deepeval-3.4.6/deepeval/_version.py +0 -1
- deepeval-3.4.6/deepeval/openai_agents/__init__.py +0 -4
- {deepeval-3.4.6 → deepeval-3.4.7}/LICENSE.md +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/annotation/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/server.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/test.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/types.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/cli/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/confident/types.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/constants.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/dataset.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/types.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/errors.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/execute.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/agent.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/crewai/patch.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/agent/patched.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/key_handler.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/base_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/gemini_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/gemini_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/ollama_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/mlllms/openai_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/models/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/patch.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/progress_context.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/prompt.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/py.typed +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/simulator/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/singleton.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/tracing/utils.py +0 -0
- {deepeval-3.4.6 → deepeval-3.4.7}/deepeval/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.7
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -31,6 +31,7 @@ Requires-Dist: pytest-asyncio
|
|
|
31
31
|
Requires-Dist: pytest-repeat
|
|
32
32
|
Requires-Dist: pytest-rerunfailures (>=12.0,<13.0)
|
|
33
33
|
Requires-Dist: pytest-xdist
|
|
34
|
+
Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
|
|
34
35
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
35
36
|
Requires-Dist: rich (>=13.6.0,<15.0.0)
|
|
36
37
|
Requires-Dist: sentry-sdk
|
|
@@ -186,6 +187,16 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
|
|
|
186
187
|
```
|
|
187
188
|
pip install -U deepeval
|
|
188
189
|
```
|
|
190
|
+
### Environment variables (.env / .env.local)
|
|
191
|
+
|
|
192
|
+
DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
|
|
193
|
+
**Precedence:** process env -> `.env.local` -> `.env`.
|
|
194
|
+
Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
cp .env.example .env.local
|
|
198
|
+
# then edit .env.local (ignored by git)
|
|
199
|
+
```
|
|
189
200
|
|
|
190
201
|
## Create an account (highly recommended)
|
|
191
202
|
|
|
@@ -411,6 +422,20 @@ You should see a link displayed in the CLI once the test has finished running. P
|
|
|
411
422
|
|
|
412
423
|
<br />
|
|
413
424
|
|
|
425
|
+
## Configuration
|
|
426
|
+
|
|
427
|
+
### Environment variables via .env files
|
|
428
|
+
|
|
429
|
+
Using `.env.local` or `.env` is optional. If they are missing, DeepEval uses your existing environment variables. When present, dotenv environment variables are auto-loaded at import time (unless you set `DEEPEVAL_DISABLE_DOTENV=1`).
|
|
430
|
+
|
|
431
|
+
**Precedence:** process env -> `.env.local` -> `.env`
|
|
432
|
+
|
|
433
|
+
```bash
|
|
434
|
+
cp .env.example .env.local
|
|
435
|
+
# then edit .env.local (ignored by git)
|
|
436
|
+
|
|
437
|
+
<br />
|
|
438
|
+
|
|
414
439
|
# Contributing
|
|
415
440
|
|
|
416
441
|
Please read [CONTRIBUTING.md](https://github.com/confident-ai/deepeval/blob/main/CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us.
|
|
@@ -140,6 +140,16 @@ Let's pretend your LLM application is a RAG based customer support chatbot; here
|
|
|
140
140
|
```
|
|
141
141
|
pip install -U deepeval
|
|
142
142
|
```
|
|
143
|
+
### Environment variables (.env / .env.local)
|
|
144
|
+
|
|
145
|
+
DeepEval auto-loads `.env.local` then `.env` from the current working directory **at import time**.
|
|
146
|
+
**Precedence:** process env -> `.env.local` -> `.env`.
|
|
147
|
+
Opt out with `DEEPEVAL_DISABLE_DOTENV=1`.
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
cp .env.example .env.local
|
|
151
|
+
# then edit .env.local (ignored by git)
|
|
152
|
+
```
|
|
143
153
|
|
|
144
154
|
## Create an account (highly recommended)
|
|
145
155
|
|
|
@@ -365,6 +375,20 @@ You should see a link displayed in the CLI once the test has finished running. P
|
|
|
365
375
|
|
|
366
376
|
<br />
|
|
367
377
|
|
|
378
|
+
## Configuration
|
|
379
|
+
|
|
380
|
+
### Environment variables via .env files
|
|
381
|
+
|
|
382
|
+
Using `.env.local` or `.env` is optional. If they are missing, DeepEval uses your existing environment variables. When present, dotenv environment variables are auto-loaded at import time (unless you set `DEEPEVAL_DISABLE_DOTENV=1`).
|
|
383
|
+
|
|
384
|
+
**Precedence:** process env -> `.env.local` -> `.env`
|
|
385
|
+
|
|
386
|
+
```bash
|
|
387
|
+
cp .env.example .env.local
|
|
388
|
+
# then edit .env.local (ignored by git)
|
|
389
|
+
|
|
390
|
+
<br />
|
|
391
|
+
|
|
368
392
|
# Contributing
|
|
369
393
|
|
|
370
394
|
Please read [CONTRIBUTING.md](https://github.com/confident-ai/deepeval/blob/main/CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us.
|
|
@@ -2,6 +2,11 @@ import os
|
|
|
2
2
|
import warnings
|
|
3
3
|
import re
|
|
4
4
|
|
|
5
|
+
# load environment variables before other imports
|
|
6
|
+
from .env import autoload_dotenv as _autoload_dotenv
|
|
7
|
+
|
|
8
|
+
_autoload_dotenv()
|
|
9
|
+
|
|
5
10
|
# Optionally add telemetry
|
|
6
11
|
from ._version import __version__
|
|
7
12
|
|
|
@@ -11,7 +16,7 @@ from deepeval.test_run import on_test_run_end, log_hyperparameters
|
|
|
11
16
|
from deepeval.utils import login
|
|
12
17
|
from deepeval.telemetry import *
|
|
13
18
|
|
|
14
|
-
if os.getenv("DEEPEVAL_GRPC_LOGGING") != "
|
|
19
|
+
if os.getenv("DEEPEVAL_GRPC_LOGGING") != "1":
|
|
15
20
|
os.environ["GRPC_VERBOSITY"] = "ERROR"
|
|
16
21
|
os.environ["GRPC_TRACE"] = ""
|
|
17
22
|
|
|
@@ -61,7 +66,7 @@ def check_for_update():
|
|
|
61
66
|
|
|
62
67
|
|
|
63
68
|
def update_warning_opt_in():
|
|
64
|
-
return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "
|
|
69
|
+
return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "1"
|
|
65
70
|
|
|
66
71
|
|
|
67
72
|
def is_read_only_env():
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.4.7"
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from dotenv import load_dotenv, find_dotenv # type: ignore
|
|
6
|
+
except Exception:
|
|
7
|
+
load_dotenv = None
|
|
8
|
+
find_dotenv = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def autoload_dotenv() -> None:
|
|
12
|
+
"""
|
|
13
|
+
Autoload environment variables for DeepEval at import time.
|
|
14
|
+
|
|
15
|
+
Precedence from highest -> lowest:
|
|
16
|
+
1) Existing process environment variables
|
|
17
|
+
2) .env.local (from current working directory)
|
|
18
|
+
3) .env (from current working directory)
|
|
19
|
+
|
|
20
|
+
Behavior:
|
|
21
|
+
- Loads .env.local then .env if present, without overriding existing vars.
|
|
22
|
+
- Opt-out by setting DEEPEVAL_DISABLE_DOTENV=1.
|
|
23
|
+
- Soft-fails cleanly if python-dotenv is not installed.
|
|
24
|
+
"""
|
|
25
|
+
if os.getenv("DEEPEVAL_DISABLE_DOTENV") == "1":
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
if not (load_dotenv and find_dotenv):
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
for name in (".env.local", ".env"):
|
|
32
|
+
path = find_dotenv(name, usecwd=True)
|
|
33
|
+
if path:
|
|
34
|
+
# Don't override previously set values
|
|
35
|
+
load_dotenv(path, override=False)
|
|
@@ -24,8 +24,9 @@ from deepeval.test_run import (
|
|
|
24
24
|
MetricData,
|
|
25
25
|
)
|
|
26
26
|
from deepeval.evaluate.types import TestResult
|
|
27
|
-
from deepeval.tracing.api import TraceApi, BaseApiSpan
|
|
27
|
+
from deepeval.tracing.api import TraceApi, BaseApiSpan, TraceSpanApiStatus
|
|
28
28
|
from deepeval.tracing.tracing import BaseSpan, Trace
|
|
29
|
+
from deepeval.tracing.types import TraceSpanStatus
|
|
29
30
|
from deepeval.constants import PYTEST_RUN_TEST_NAME
|
|
30
31
|
from deepeval.tracing.utils import (
|
|
31
32
|
perf_counter_to_datetime,
|
|
@@ -247,6 +248,11 @@ def create_api_trace(trace: Trace, golden: Golden) -> TraceApi:
|
|
|
247
248
|
tools_called=trace.tools_called,
|
|
248
249
|
expected_tools=trace.expected_tools,
|
|
249
250
|
metadata=golden.additional_metadata,
|
|
251
|
+
status=(
|
|
252
|
+
TraceSpanApiStatus.SUCCESS
|
|
253
|
+
if trace.status == TraceSpanStatus.SUCCESS
|
|
254
|
+
else TraceSpanApiStatus.ERRORED
|
|
255
|
+
),
|
|
250
256
|
)
|
|
251
257
|
|
|
252
258
|
|
|
@@ -248,6 +248,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
248
248
|
model=model,
|
|
249
249
|
# fallback for on_end callback
|
|
250
250
|
end_time=perf_counter(),
|
|
251
|
+
metric_collection=metadata.get("metric_collection", None),
|
|
252
|
+
metrics=metadata.get("metrics", None),
|
|
251
253
|
)
|
|
252
254
|
|
|
253
255
|
self.add_span_to_trace(llm_span)
|
|
@@ -348,6 +350,8 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
348
350
|
),
|
|
349
351
|
# fallback for on_end callback
|
|
350
352
|
end_time=perf_counter(),
|
|
353
|
+
metric_collection=metadata.get("metric_collection", None),
|
|
354
|
+
metrics=metadata.get("metrics", None),
|
|
351
355
|
)
|
|
352
356
|
self.add_span_to_trace(tool_span)
|
|
353
357
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from langchain_core.tools import tool as original_tool, BaseTool
|
|
2
|
+
from deepeval.metrics import BaseMetric
|
|
3
|
+
from typing import List, Optional, Callable, Any
|
|
4
|
+
from functools import wraps
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def tool(
|
|
8
|
+
*args,
|
|
9
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
10
|
+
metric_collection: Optional[str] = None,
|
|
11
|
+
**kwargs
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Patched version of langchain_core.tools.tool that prints inputs and outputs
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
# original_tool returns a decorator function, so we need to return a decorator
|
|
18
|
+
def decorator(func: Callable) -> BaseTool:
|
|
19
|
+
|
|
20
|
+
# Apply the original tool decorator to get the BaseTool
|
|
21
|
+
tool_instance = original_tool(*args, **kwargs)(func)
|
|
22
|
+
|
|
23
|
+
if isinstance(tool_instance, BaseTool):
|
|
24
|
+
if tool_instance.metadata is None:
|
|
25
|
+
tool_instance.metadata = {}
|
|
26
|
+
|
|
27
|
+
tool_instance.metadata["metric_collection"] = metric_collection
|
|
28
|
+
tool_instance.metadata["metrics"] = metrics
|
|
29
|
+
|
|
30
|
+
return tool_instance
|
|
31
|
+
|
|
32
|
+
return decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from deepeval.telemetry import capture_tracing_integration
|
|
2
2
|
from deepeval.metrics import BaseMetric
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
import functools
|
|
5
5
|
import inspect
|
|
6
6
|
import json
|
|
@@ -8,6 +8,7 @@ from deepeval.test_case import LLMTestCase
|
|
|
8
8
|
from deepeval.tracing.types import TestCaseMetricPair
|
|
9
9
|
from deepeval.tracing.tracing import trace_manager
|
|
10
10
|
from deepeval.tracing.otel.utils import parse_string, parse_list_of_strings
|
|
11
|
+
from opentelemetry import trace
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
14
|
from opentelemetry.trace import NoOpTracer
|
|
@@ -63,6 +64,95 @@ class PydanticAIAgent(Agent):
|
|
|
63
64
|
# Patch the run method only for this instance
|
|
64
65
|
self._patch_run_method()
|
|
65
66
|
self._patch_run_method_sync()
|
|
67
|
+
self._patch_tool_decorator()
|
|
68
|
+
|
|
69
|
+
def _patch_tool_decorator(self):
|
|
70
|
+
"""Patch the tool decorator to print input and output"""
|
|
71
|
+
original_tool = self.tool
|
|
72
|
+
|
|
73
|
+
@functools.wraps(original_tool)
|
|
74
|
+
def patched_tool(
|
|
75
|
+
*args,
|
|
76
|
+
metric_collection: Optional[str] = None,
|
|
77
|
+
metrics: Optional[List[BaseMetric]] = None,
|
|
78
|
+
**kwargs
|
|
79
|
+
):
|
|
80
|
+
|
|
81
|
+
# Check if function is in args (direct decoration: @agent.tool)
|
|
82
|
+
if args and callable(args[0]):
|
|
83
|
+
original_func = args[0]
|
|
84
|
+
patched_func = self._create_patched_function(
|
|
85
|
+
original_func, metric_collection, metrics
|
|
86
|
+
)
|
|
87
|
+
new_args = (patched_func,) + args[1:]
|
|
88
|
+
result = original_tool(*new_args, **kwargs)
|
|
89
|
+
return result
|
|
90
|
+
else:
|
|
91
|
+
# Decorator called with parameters: @agent.tool(metric_collection="...")
|
|
92
|
+
# Return a decorator that will receive the function
|
|
93
|
+
def decorator_with_params(func):
|
|
94
|
+
patched_func = self._create_patched_function(
|
|
95
|
+
func, metric_collection, metrics
|
|
96
|
+
)
|
|
97
|
+
return original_tool(patched_func, **kwargs)
|
|
98
|
+
|
|
99
|
+
return decorator_with_params
|
|
100
|
+
|
|
101
|
+
# Replace the tool method for this instance
|
|
102
|
+
self.tool = patched_tool
|
|
103
|
+
|
|
104
|
+
def _create_patched_function(
|
|
105
|
+
self, original_func, metric_collection, metrics
|
|
106
|
+
):
|
|
107
|
+
"""Create a patched version of the function that adds tracing"""
|
|
108
|
+
if inspect.iscoroutinefunction(original_func):
|
|
109
|
+
|
|
110
|
+
@functools.wraps(original_func)
|
|
111
|
+
async def patched_async_func(*func_args, **func_kwargs):
|
|
112
|
+
result = await original_func(*func_args, **func_kwargs)
|
|
113
|
+
|
|
114
|
+
current_span = trace.get_current_span()
|
|
115
|
+
if current_span.is_recording():
|
|
116
|
+
try:
|
|
117
|
+
result_str = str(result)
|
|
118
|
+
except Exception:
|
|
119
|
+
result_str = ""
|
|
120
|
+
current_span.set_attribute(
|
|
121
|
+
"confident.span.output", result_str
|
|
122
|
+
)
|
|
123
|
+
if metric_collection:
|
|
124
|
+
current_span.set_attribute(
|
|
125
|
+
"confident.span.metric_collection",
|
|
126
|
+
metric_collection,
|
|
127
|
+
)
|
|
128
|
+
# TODO: add metrics in component level evals
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
return patched_async_func
|
|
132
|
+
else:
|
|
133
|
+
|
|
134
|
+
@functools.wraps(original_func)
|
|
135
|
+
def patched_sync_func(*func_args, **func_kwargs):
|
|
136
|
+
result = original_func(*func_args, **func_kwargs)
|
|
137
|
+
|
|
138
|
+
current_span = trace.get_current_span()
|
|
139
|
+
if current_span.is_recording():
|
|
140
|
+
try:
|
|
141
|
+
result_str = str(result)
|
|
142
|
+
except Exception:
|
|
143
|
+
result_str = ""
|
|
144
|
+
current_span.set_attribute(
|
|
145
|
+
"confident.span.output", result_str
|
|
146
|
+
)
|
|
147
|
+
if metric_collection:
|
|
148
|
+
current_span.set_attribute(
|
|
149
|
+
"confident.span.metric_collection",
|
|
150
|
+
metric_collection,
|
|
151
|
+
)
|
|
152
|
+
# TODO: add metrics in component level evals
|
|
153
|
+
return result
|
|
154
|
+
|
|
155
|
+
return patched_sync_func
|
|
66
156
|
|
|
67
157
|
def _patch_run_method(self):
|
|
68
158
|
"""Patch the Agent.run method only for this PydanticAIAgent instance"""
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from deepeval.openai_agents.callback_handler import DeepEvalTracingProcessor
|
|
2
|
+
from deepeval.openai_agents.runner import Runner
|
|
3
|
+
from deepeval.openai_agents.patch import function_tool
|
|
4
|
+
from deepeval.openai_agents.agent import DeepEvalAgent as Agent
|
|
5
|
+
|
|
6
|
+
__all__ = ["DeepEvalTracingProcessor", "Runner", "function_tool", "Agent"]
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field, replace
|
|
4
|
+
from typing import Any, Optional, Awaitable, Callable
|
|
5
|
+
|
|
6
|
+
from deepeval.tracing import observe
|
|
7
|
+
from deepeval.prompt import Prompt
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from agents.agent import Agent as BaseAgent
|
|
11
|
+
from agents.models.interface import Model, ModelProvider
|
|
12
|
+
except Exception as e:
|
|
13
|
+
raise RuntimeError(
|
|
14
|
+
"openai-agents is required for this integration. Please install it."
|
|
15
|
+
) from e
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _ObservedModel(Model):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
inner: Model,
|
|
22
|
+
*,
|
|
23
|
+
metrics: Optional[list[Any]] = None,
|
|
24
|
+
metric_collection: Optional[str] = None,
|
|
25
|
+
deepeval_prompt: Optional[Any] = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
self._inner = inner
|
|
28
|
+
self._metrics = metrics
|
|
29
|
+
self._metric_collection = metric_collection
|
|
30
|
+
self._deepeval_prompt = deepeval_prompt
|
|
31
|
+
|
|
32
|
+
# Delegate attributes not overridden
|
|
33
|
+
def __getattr__(self, name: str) -> Any:
|
|
34
|
+
return getattr(self._inner, name)
|
|
35
|
+
|
|
36
|
+
def _get_model_name(self) -> str:
|
|
37
|
+
try:
|
|
38
|
+
for attr in ("model", "model_name", "name"):
|
|
39
|
+
if hasattr(self._inner, attr):
|
|
40
|
+
val = getattr(self._inner, attr)
|
|
41
|
+
if val is not None:
|
|
42
|
+
return str(val)
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
return "unknown"
|
|
46
|
+
|
|
47
|
+
async def get_response(
|
|
48
|
+
self,
|
|
49
|
+
system_instructions,
|
|
50
|
+
input,
|
|
51
|
+
model_settings,
|
|
52
|
+
tools,
|
|
53
|
+
output_schema,
|
|
54
|
+
handoffs,
|
|
55
|
+
tracing,
|
|
56
|
+
*,
|
|
57
|
+
previous_response_id,
|
|
58
|
+
conversation_id,
|
|
59
|
+
prompt,
|
|
60
|
+
):
|
|
61
|
+
model_name = self._get_model_name()
|
|
62
|
+
|
|
63
|
+
wrapped = observe(
|
|
64
|
+
metrics=self._metrics,
|
|
65
|
+
metric_collection=self._metric_collection,
|
|
66
|
+
type="llm",
|
|
67
|
+
model=model_name,
|
|
68
|
+
prompt=self._deepeval_prompt,
|
|
69
|
+
)(self._inner.get_response)
|
|
70
|
+
|
|
71
|
+
return await wrapped(
|
|
72
|
+
system_instructions,
|
|
73
|
+
input,
|
|
74
|
+
model_settings,
|
|
75
|
+
tools,
|
|
76
|
+
output_schema,
|
|
77
|
+
handoffs,
|
|
78
|
+
tracing,
|
|
79
|
+
previous_response_id=previous_response_id,
|
|
80
|
+
conversation_id=conversation_id,
|
|
81
|
+
prompt=prompt,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def stream_response(
|
|
85
|
+
self,
|
|
86
|
+
system_instructions,
|
|
87
|
+
input,
|
|
88
|
+
model_settings,
|
|
89
|
+
tools,
|
|
90
|
+
output_schema,
|
|
91
|
+
handoffs,
|
|
92
|
+
tracing,
|
|
93
|
+
*,
|
|
94
|
+
previous_response_id,
|
|
95
|
+
conversation_id,
|
|
96
|
+
prompt,
|
|
97
|
+
):
|
|
98
|
+
# Optional: if you also want to observe streaming, uncomment and wrap similarly.
|
|
99
|
+
# wrapped = observe(
|
|
100
|
+
# metrics=self._metrics,
|
|
101
|
+
# metric_collection=self._metric_collection,
|
|
102
|
+
# type="llm",
|
|
103
|
+
# model=model_name,
|
|
104
|
+
# )(self._inner.stream_response)
|
|
105
|
+
# return wrapped(
|
|
106
|
+
# system_instructions,
|
|
107
|
+
# input,
|
|
108
|
+
# model_settings,
|
|
109
|
+
# tools,
|
|
110
|
+
# output_schema,
|
|
111
|
+
# handoffs,
|
|
112
|
+
# tracing,
|
|
113
|
+
# previous_response_id=previous_response_id,
|
|
114
|
+
# conversation_id=conversation_id,
|
|
115
|
+
# prompt=prompt,
|
|
116
|
+
# )
|
|
117
|
+
return self._inner.stream_response(
|
|
118
|
+
system_instructions,
|
|
119
|
+
input,
|
|
120
|
+
model_settings,
|
|
121
|
+
tools,
|
|
122
|
+
output_schema,
|
|
123
|
+
handoffs,
|
|
124
|
+
tracing,
|
|
125
|
+
previous_response_id=previous_response_id,
|
|
126
|
+
conversation_id=conversation_id,
|
|
127
|
+
prompt=prompt,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class _ObservedProvider(ModelProvider):
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
base: ModelProvider,
|
|
135
|
+
*,
|
|
136
|
+
metrics: Optional[list[Any]] = None,
|
|
137
|
+
metric_collection: Optional[str] = None,
|
|
138
|
+
deepeval_prompt: Optional[Any] = None,
|
|
139
|
+
) -> None:
|
|
140
|
+
self._base = base
|
|
141
|
+
self._metrics = metrics
|
|
142
|
+
self._metric_collection = metric_collection
|
|
143
|
+
self._deepeval_prompt = deepeval_prompt
|
|
144
|
+
|
|
145
|
+
def get_model(self, model_name: str | None) -> Model:
|
|
146
|
+
model = self._base.get_model(model_name)
|
|
147
|
+
return _ObservedModel(
|
|
148
|
+
model,
|
|
149
|
+
metrics=self._metrics,
|
|
150
|
+
metric_collection=self._metric_collection,
|
|
151
|
+
deepeval_prompt=self._deepeval_prompt,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass
|
|
156
|
+
class DeepEvalAgent(BaseAgent[Any]):
|
|
157
|
+
"""
|
|
158
|
+
A subclass of agents.Agent that accepts `metrics` and `metric_collection`
|
|
159
|
+
and ensures the underlying model's `get_response` is wrapped with deepeval.observe.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
metrics: list[Any] | None = field(default=None)
|
|
163
|
+
metric_collection: str | None = field(default=None)
|
|
164
|
+
deepeval_prompt: Prompt | None = field(default=None)
|
|
165
|
+
|
|
166
|
+
def __post_init__(self):
|
|
167
|
+
super().__post_init__()
|
|
168
|
+
# If a direct Model instance is set on the agent, wrap it here.
|
|
169
|
+
if self.model is not None and not isinstance(self.model, str):
|
|
170
|
+
try:
|
|
171
|
+
from agents.models.interface import (
|
|
172
|
+
Model as _Model,
|
|
173
|
+
) # local import for safety
|
|
174
|
+
|
|
175
|
+
if isinstance(self.model, _Model):
|
|
176
|
+
self.model = _ObservedModel(
|
|
177
|
+
self.model,
|
|
178
|
+
metrics=self.metrics,
|
|
179
|
+
metric_collection=self.metric_collection,
|
|
180
|
+
deepeval_prompt=self.deepeval_prompt,
|
|
181
|
+
)
|
|
182
|
+
except Exception:
|
|
183
|
+
# If we can't import or wrap, silently skip.
|
|
184
|
+
pass
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from deepeval.tracing.tracing import (
|
|
2
2
|
Observer,
|
|
3
|
-
|
|
4
|
-
current_trace_context,
|
|
3
|
+
current_span_context,
|
|
5
4
|
)
|
|
6
5
|
from deepeval.openai_agents.extractors import *
|
|
6
|
+
from deepeval.tracing.context import current_trace_context
|
|
7
7
|
|
|
8
8
|
try:
|
|
9
9
|
from agents.tracing import Span, Trace, TracingProcessor
|
|
@@ -37,34 +37,41 @@ class DeepEvalTracingProcessor(TracingProcessor):
|
|
|
37
37
|
self.span_observers: dict[str, Observer] = {}
|
|
38
38
|
|
|
39
39
|
def on_trace_start(self, trace: "Trace") -> None:
|
|
40
|
-
|
|
41
|
-
self.root_span_observers[trace.trace_id] = observer
|
|
42
|
-
observer.__enter__()
|
|
40
|
+
pass
|
|
43
41
|
|
|
44
42
|
def on_trace_end(self, trace: "Trace") -> None:
|
|
45
|
-
|
|
46
|
-
current_trace = current_trace_context.get()
|
|
47
|
-
thread_id = getattr(trace, "group_id", None)
|
|
48
|
-
current_trace.thread_id = thread_id
|
|
49
|
-
|
|
50
|
-
observer = self.root_span_observers.pop(trace.trace_id, None)
|
|
51
|
-
if observer:
|
|
52
|
-
observer.__exit__(None, None, None)
|
|
43
|
+
pass
|
|
53
44
|
|
|
54
45
|
def on_span_start(self, span: "Span") -> None:
|
|
55
46
|
if not span.started_at:
|
|
56
47
|
return
|
|
57
48
|
span_type = self.get_span_kind(span.span_data)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
49
|
+
if span_type == "agent":
|
|
50
|
+
if isinstance(span.span_data, AgentSpanData):
|
|
51
|
+
current_trace = current_trace_context.get()
|
|
52
|
+
if current_trace:
|
|
53
|
+
current_trace.name = span.span_data.name
|
|
54
|
+
|
|
55
|
+
if span_type == "tool":
|
|
56
|
+
return
|
|
57
|
+
elif span_type == "llm":
|
|
58
|
+
return
|
|
59
|
+
else:
|
|
60
|
+
observer = Observer(span_type=span_type, func_name="NA")
|
|
61
|
+
observer.update_span_properties = (
|
|
62
|
+
lambda base_span: update_span_properties(
|
|
63
|
+
base_span, span.span_data
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
self.span_observers[span.span_id] = observer
|
|
67
|
+
observer.__enter__()
|
|
66
68
|
|
|
67
69
|
def on_span_end(self, span: "Span") -> None:
|
|
70
|
+
span_type = self.get_span_kind(span.span_data)
|
|
71
|
+
if span_type == "llm":
|
|
72
|
+
current_span = current_span_context.get()
|
|
73
|
+
if current_span:
|
|
74
|
+
update_span_properties(current_span, span.span_data)
|
|
68
75
|
observer = self.span_observers.pop(span.span_id, None)
|
|
69
76
|
if observer:
|
|
70
77
|
observer.__exit__(None, None, None)
|