deepeval 3.5.5__tar.gz → 3.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.5.5 → deepeval-3.5.7}/PKG-INFO +1 -1
- deepeval-3.5.7/deepeval/_version.py +1 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/main.py +182 -18
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/dataset.py +39 -2
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/execute.py +10 -6
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +34 -19
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/agent.py +3 -4
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/callback_handler.py +44 -3
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/runner.py +24 -20
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/scorer/scorer.py +2 -2
- {deepeval-3.5.5 → deepeval-3.5.7}/pyproject.toml +1 -1
- deepeval-3.5.5/deepeval/_version.py +0 -1
- {deepeval-3.5.5 → deepeval-3.5.7}/LICENSE.md +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/README.md +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/annotation/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/server.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/test.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/cli/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/confident/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/confident/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/config/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/config/settings.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/config/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/constants.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/errors.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/crewai/agent.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/crewai/patch.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/llama_index/agent/patched.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/pydantic_ai/patcher.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/integrations/pydantic_ai/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/key_handler.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/base_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/gemini_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/mlllms/gemini_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/mlllms/ollama_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/mlllms/openai_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/models/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai/patch.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/progress_context.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/prompt/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/prompt/prompt.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/py.typed +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/simulator/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/singleton.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/telemetry.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/context.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/tracing.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/types.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/tracing/utils.py +0 -0
- {deepeval-3.5.5 → deepeval-3.5.7}/deepeval/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.5.7"
|
|
@@ -28,8 +28,6 @@ import typer
|
|
|
28
28
|
from enum import Enum
|
|
29
29
|
from pydantic import SecretStr
|
|
30
30
|
from deepeval.key_handler import (
|
|
31
|
-
KEY_FILE_HANDLER,
|
|
32
|
-
KeyValues,
|
|
33
31
|
EmbeddingKeyValues,
|
|
34
32
|
ModelKeyValues,
|
|
35
33
|
)
|
|
@@ -46,16 +44,9 @@ from deepeval.cli.utils import (
|
|
|
46
44
|
render_login_message,
|
|
47
45
|
upload_and_open_link,
|
|
48
46
|
PROD,
|
|
49
|
-
resolve_save_target,
|
|
50
|
-
save_environ_to_store,
|
|
51
|
-
unset_environ_in_store,
|
|
52
|
-
switch_model_provider,
|
|
53
47
|
)
|
|
54
48
|
from deepeval.confident.api import (
|
|
55
|
-
get_confident_api_key,
|
|
56
49
|
is_confident,
|
|
57
|
-
set_confident_api_key,
|
|
58
|
-
CONFIDENT_API_KEY_ENV_VAR,
|
|
59
50
|
)
|
|
60
51
|
|
|
61
52
|
app = typer.Typer(name="deepeval")
|
|
@@ -109,7 +100,7 @@ def set_confident_region_command(
|
|
|
109
100
|
# Add flag emojis based on region
|
|
110
101
|
flag = "🇺🇸" if region == Regions.US else "🇪🇺"
|
|
111
102
|
|
|
112
|
-
|
|
103
|
+
settings = get_settings()
|
|
113
104
|
with settings.edit(save=save) as edit_ctx:
|
|
114
105
|
settings.CONFIDENT_REGION = region.value
|
|
115
106
|
|
|
@@ -282,23 +273,196 @@ def view():
|
|
|
282
273
|
upload_and_open_link(_span=span)
|
|
283
274
|
|
|
284
275
|
|
|
285
|
-
@app.command(name="
|
|
286
|
-
def
|
|
276
|
+
@app.command(name="set-debug")
|
|
277
|
+
def set_debug(
|
|
278
|
+
# Core verbosity
|
|
279
|
+
log_level: Optional[str] = typer.Option(
|
|
280
|
+
None,
|
|
281
|
+
"--log-level",
|
|
282
|
+
help="Global LOG_LEVEL (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET).",
|
|
283
|
+
),
|
|
284
|
+
verbose: Optional[bool] = typer.Option(
|
|
285
|
+
None, "--verbose/--no-verbose", help="Toggle DEEPEVAL_VERBOSE_MODE."
|
|
286
|
+
),
|
|
287
|
+
# Retry logging dials
|
|
288
|
+
retry_before_level: Optional[str] = typer.Option(
|
|
289
|
+
None,
|
|
290
|
+
"--retry-before-level",
|
|
291
|
+
help="Log level before a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
|
|
292
|
+
),
|
|
293
|
+
retry_after_level: Optional[str] = typer.Option(
|
|
294
|
+
None,
|
|
295
|
+
"--retry-after-level",
|
|
296
|
+
help="Log level after a retry attempt (DEBUG|INFO|WARNING|ERROR|CRITICAL|NOTSET or numeric).",
|
|
297
|
+
),
|
|
298
|
+
# gRPC visibility
|
|
299
|
+
grpc: Optional[bool] = typer.Option(
|
|
300
|
+
None, "--grpc/--no-grpc", help="Toggle DEEPEVAL_GRPC_LOGGING."
|
|
301
|
+
),
|
|
302
|
+
grpc_verbosity: Optional[str] = typer.Option(
|
|
303
|
+
None,
|
|
304
|
+
"--grpc-verbosity",
|
|
305
|
+
help="Set GRPC_VERBOSITY (DEBUG|INFO|ERROR|NONE).",
|
|
306
|
+
),
|
|
307
|
+
grpc_trace: Optional[str] = typer.Option(
|
|
308
|
+
None,
|
|
309
|
+
"--grpc-trace",
|
|
310
|
+
help=(
|
|
311
|
+
"Set GRPC_TRACE to comma-separated tracer names or glob patterns "
|
|
312
|
+
"(e.g. 'tcp,http,secure_endpoint', '*' for all, 'list_tracers' to print available)."
|
|
313
|
+
),
|
|
314
|
+
),
|
|
315
|
+
# Confident tracing
|
|
316
|
+
trace_verbose: Optional[bool] = typer.Option(
|
|
317
|
+
None,
|
|
318
|
+
"--trace-verbose/--no-trace-verbose",
|
|
319
|
+
help="Enable / disable CONFIDENT_TRACE_VERBOSE.",
|
|
320
|
+
),
|
|
321
|
+
trace_env: Optional[str] = typer.Option(
|
|
322
|
+
None,
|
|
323
|
+
"--trace-env",
|
|
324
|
+
help='Set CONFIDENT_TRACE_ENVIRONMENT ("development", "staging", "production", etc).',
|
|
325
|
+
),
|
|
326
|
+
trace_flush: Optional[bool] = typer.Option(
|
|
327
|
+
None,
|
|
328
|
+
"--trace-flush/--no-trace-flush",
|
|
329
|
+
help="Enable / disable CONFIDENT_TRACE_FLUSH.",
|
|
330
|
+
),
|
|
331
|
+
# Advanced / potentially surprising
|
|
332
|
+
error_reporting: Optional[bool] = typer.Option(
|
|
333
|
+
None,
|
|
334
|
+
"--error-reporting/--no-error-reporting",
|
|
335
|
+
help="Enable / disable ERROR_REPORTING.",
|
|
336
|
+
),
|
|
337
|
+
ignore_errors: Optional[bool] = typer.Option(
|
|
338
|
+
None,
|
|
339
|
+
"--ignore-errors/--no-ignore-errors",
|
|
340
|
+
help="Enable / disable IGNORE_DEEPEVAL_ERRORS (not recommended in normal debugging).",
|
|
341
|
+
),
|
|
342
|
+
# Persistence
|
|
343
|
+
save: Optional[str] = typer.Option(
|
|
344
|
+
None,
|
|
345
|
+
"--save",
|
|
346
|
+
help="Persist CLI parameters as environment variables in a dotenv file. "
|
|
347
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
348
|
+
),
|
|
349
|
+
):
|
|
287
350
|
"""
|
|
288
|
-
|
|
289
|
-
|
|
351
|
+
Configure verbose debug behavior for DeepEval.
|
|
352
|
+
|
|
353
|
+
This command lets you mix-and-match verbosity flags (global LOG_LEVEL, verbose mode),
|
|
354
|
+
retry logger levels, gRPC wire logging, and Confident trace toggles. Values apply
|
|
355
|
+
immediately to the current process and can be persisted to a dotenv file with --save.
|
|
356
|
+
|
|
357
|
+
Examples:
|
|
358
|
+
deepeval set-debug --log-level DEBUG --verbose --grpc --retry-before-level DEBUG --retry-after-level INFO
|
|
359
|
+
deepeval set-debug --trace-verbose --trace-env staging --save dotenv:.env.local
|
|
290
360
|
"""
|
|
291
361
|
settings = get_settings()
|
|
292
362
|
with settings.edit(save=save) as edit_ctx:
|
|
293
|
-
|
|
363
|
+
# Core verbosity
|
|
364
|
+
if log_level is not None:
|
|
365
|
+
settings.LOG_LEVEL = log_level
|
|
366
|
+
if verbose is not None:
|
|
367
|
+
settings.DEEPEVAL_VERBOSE_MODE = verbose
|
|
368
|
+
|
|
369
|
+
# Retry logging
|
|
370
|
+
if retry_before_level is not None:
|
|
371
|
+
settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = retry_before_level
|
|
372
|
+
if retry_after_level is not None:
|
|
373
|
+
settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = retry_after_level
|
|
374
|
+
|
|
375
|
+
# gRPC
|
|
376
|
+
if grpc is not None:
|
|
377
|
+
settings.DEEPEVAL_GRPC_LOGGING = grpc
|
|
378
|
+
if grpc_verbosity is not None:
|
|
379
|
+
settings.GRPC_VERBOSITY = grpc_verbosity
|
|
380
|
+
if grpc_trace is not None:
|
|
381
|
+
settings.GRPC_TRACE = grpc_trace
|
|
382
|
+
|
|
383
|
+
# Confident tracing
|
|
384
|
+
if trace_verbose is not None:
|
|
385
|
+
settings.CONFIDENT_TRACE_VERBOSE = trace_verbose
|
|
386
|
+
if trace_env is not None:
|
|
387
|
+
settings.CONFIDENT_TRACE_ENVIRONMENT = trace_env
|
|
388
|
+
if trace_flush is not None:
|
|
389
|
+
settings.CONFIDENT_TRACE_FLUSH = trace_flush
|
|
390
|
+
|
|
391
|
+
# Advanced
|
|
392
|
+
if error_reporting is not None:
|
|
393
|
+
settings.ERROR_REPORTING = error_reporting
|
|
394
|
+
if ignore_errors is not None:
|
|
395
|
+
settings.IGNORE_DEEPEVAL_ERRORS = ignore_errors
|
|
396
|
+
|
|
397
|
+
handled, path, updated = edit_ctx.result
|
|
398
|
+
|
|
399
|
+
if not updated:
|
|
400
|
+
# no changes were made, so there is nothing to do.
|
|
401
|
+
return
|
|
402
|
+
|
|
403
|
+
if not handled and save is not None:
|
|
404
|
+
print("Unsupported --save option. Use --save=dotenv[:path].")
|
|
405
|
+
elif path:
|
|
406
|
+
print(
|
|
407
|
+
f"Saved environment variables to {path} (ensure it's git-ignored)."
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
print(
|
|
411
|
+
"Settings updated for this session. To persist, use --save=dotenv[:path] "
|
|
412
|
+
"(default .env.local) or set DEEPEVAL_DEFAULT_SAVE=dotenv:.env.local"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
print(":loud_sound: Debug options updated.")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
@app.command(name="unset-debug")
|
|
419
|
+
def unset_debug(
|
|
420
|
+
save: Optional[str] = typer.Option(
|
|
421
|
+
None,
|
|
422
|
+
"--save",
|
|
423
|
+
help="Remove only the debug-related environment variables from a dotenv file. "
|
|
424
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
425
|
+
),
|
|
426
|
+
):
|
|
427
|
+
"""
|
|
428
|
+
Restore default behavior by unsetting debug related variables.
|
|
429
|
+
|
|
430
|
+
Behavior:
|
|
431
|
+
- Resets LOG_LEVEL back to 'info'.
|
|
432
|
+
- Unsets DEEPEVAL_VERBOSE_MODE, retry log-level overrides, gRPC and Confident trace flags.
|
|
433
|
+
- If --save is provided (or DEEPEVAL_DEFAULT_SAVE is set), removes these keys from the target dotenv file.
|
|
434
|
+
"""
|
|
435
|
+
settings = get_settings()
|
|
436
|
+
with settings.edit(save=save) as edit_ctx:
|
|
437
|
+
# Back to normal global level
|
|
438
|
+
settings.LOG_LEVEL = "info"
|
|
439
|
+
settings.CONFIDENT_TRACE_ENVIRONMENT = "development"
|
|
440
|
+
settings.CONFIDENT_TRACE_VERBOSE = True
|
|
441
|
+
|
|
442
|
+
# Clear optional toggles/overrides
|
|
443
|
+
settings.DEEPEVAL_VERBOSE_MODE = None
|
|
444
|
+
settings.DEEPEVAL_RETRY_BEFORE_LOG_LEVEL = None
|
|
445
|
+
settings.DEEPEVAL_RETRY_AFTER_LOG_LEVEL = None
|
|
446
|
+
|
|
447
|
+
settings.DEEPEVAL_GRPC_LOGGING = None
|
|
448
|
+
settings.GRPC_VERBOSITY = None
|
|
449
|
+
settings.GRPC_TRACE = None
|
|
450
|
+
|
|
451
|
+
settings.CONFIDENT_TRACE_FLUSH = None
|
|
452
|
+
|
|
453
|
+
settings.ERROR_REPORTING = None
|
|
454
|
+
settings.IGNORE_DEEPEVAL_ERRORS = None
|
|
294
455
|
|
|
295
456
|
handled, path, _ = edit_ctx.result
|
|
296
457
|
|
|
297
458
|
if not handled and save is not None:
|
|
298
|
-
# invalid --save format (unsupported)
|
|
299
459
|
print("Unsupported --save option. Use --save=dotenv[:path].")
|
|
460
|
+
elif path:
|
|
461
|
+
print(f"Removed debug-related environment variables from {path}.")
|
|
300
462
|
else:
|
|
301
|
-
print("
|
|
463
|
+
print("Debug settings reverted to defaults for this session.")
|
|
464
|
+
|
|
465
|
+
print(":mute: Debug options unset.")
|
|
302
466
|
|
|
303
467
|
|
|
304
468
|
#############################################
|
|
@@ -1336,7 +1500,7 @@ def set_gemini_model_env(
|
|
|
1336
1500
|
)
|
|
1337
1501
|
else:
|
|
1338
1502
|
print(
|
|
1339
|
-
|
|
1503
|
+
":raising_hands: Congratulations! You're now using Gemini's model for all evals that require an LLM."
|
|
1340
1504
|
)
|
|
1341
1505
|
|
|
1342
1506
|
|
|
@@ -458,6 +458,8 @@ class EvaluationDataset:
|
|
|
458
458
|
tools_called_col_delimiter: str = ";",
|
|
459
459
|
expected_tools_col_name: Optional[str] = "expected_tools",
|
|
460
460
|
expected_tools_col_delimiter: str = ";",
|
|
461
|
+
comments_key_name: str = "comments",
|
|
462
|
+
name_key_name: str = "name",
|
|
461
463
|
source_file_col_name: Optional[str] = None,
|
|
462
464
|
additional_metadata_col_name: Optional[str] = None,
|
|
463
465
|
scenario_col_name: Optional[str] = "scenario",
|
|
@@ -526,6 +528,8 @@ class EvaluationDataset:
|
|
|
526
528
|
df, expected_tools_col_name, default=""
|
|
527
529
|
)
|
|
528
530
|
]
|
|
531
|
+
comments = get_column_data(df, comments_key_name)
|
|
532
|
+
name = get_column_data(df, name_key_name)
|
|
529
533
|
source_files = get_column_data(df, source_file_col_name)
|
|
530
534
|
additional_metadatas = [
|
|
531
535
|
ast.literal_eval(metadata) if metadata else None
|
|
@@ -546,6 +550,8 @@ class EvaluationDataset:
|
|
|
546
550
|
retrieval_context,
|
|
547
551
|
tools_called,
|
|
548
552
|
expected_tools,
|
|
553
|
+
comments,
|
|
554
|
+
name,
|
|
549
555
|
source_file,
|
|
550
556
|
additional_metadata,
|
|
551
557
|
scenario,
|
|
@@ -560,6 +566,8 @@ class EvaluationDataset:
|
|
|
560
566
|
retrieval_contexts,
|
|
561
567
|
tools_called,
|
|
562
568
|
expected_tools,
|
|
569
|
+
comments,
|
|
570
|
+
name,
|
|
563
571
|
source_files,
|
|
564
572
|
additional_metadatas,
|
|
565
573
|
scenarios,
|
|
@@ -569,7 +577,7 @@ class EvaluationDataset:
|
|
|
569
577
|
):
|
|
570
578
|
if scenario:
|
|
571
579
|
self._multi_turn = True
|
|
572
|
-
parsed_turns = parse_turns(turns)
|
|
580
|
+
parsed_turns = parse_turns(turns) if turns else []
|
|
573
581
|
self.goldens.append(
|
|
574
582
|
ConversationalGolden(
|
|
575
583
|
scenario=scenario,
|
|
@@ -577,6 +585,8 @@ class EvaluationDataset:
|
|
|
577
585
|
expected_outcome=expected_outcome,
|
|
578
586
|
user_description=user_description,
|
|
579
587
|
context=context,
|
|
588
|
+
comments=comments,
|
|
589
|
+
name=name,
|
|
580
590
|
)
|
|
581
591
|
)
|
|
582
592
|
else:
|
|
@@ -592,6 +602,8 @@ class EvaluationDataset:
|
|
|
592
602
|
expected_tools=expected_tools,
|
|
593
603
|
additional_metadata=additional_metadata,
|
|
594
604
|
source_file=source_file,
|
|
605
|
+
comments=comments,
|
|
606
|
+
name=name,
|
|
595
607
|
)
|
|
596
608
|
)
|
|
597
609
|
|
|
@@ -605,6 +617,8 @@ class EvaluationDataset:
|
|
|
605
617
|
retrieval_context_key_name: Optional[str] = "retrieval_context",
|
|
606
618
|
tools_called_key_name: Optional[str] = "tools_called",
|
|
607
619
|
expected_tools_key_name: Optional[str] = "expected_tools",
|
|
620
|
+
comments_key_name: str = "comments",
|
|
621
|
+
name_key_name: str = "name",
|
|
608
622
|
source_file_key_name: Optional[str] = "source_file",
|
|
609
623
|
additional_metadata_key_name: Optional[str] = "additional_metadata",
|
|
610
624
|
scenario_key_name: Optional[str] = "scenario",
|
|
@@ -628,7 +642,8 @@ class EvaluationDataset:
|
|
|
628
642
|
expected_outcome = json_obj.get(expected_outcome_key_name)
|
|
629
643
|
user_description = json_obj.get(user_description_key_name)
|
|
630
644
|
context = json_obj.get(context_key_name)
|
|
631
|
-
|
|
645
|
+
comments = json_obj.get(comments_key_name)
|
|
646
|
+
name = json_obj.get(name_key_name)
|
|
632
647
|
parsed_turns = parse_turns(turns) if turns else []
|
|
633
648
|
|
|
634
649
|
self._multi_turn = True
|
|
@@ -639,6 +654,8 @@ class EvaluationDataset:
|
|
|
639
654
|
expected_outcome=expected_outcome,
|
|
640
655
|
user_description=user_description,
|
|
641
656
|
context=context,
|
|
657
|
+
comments=comments,
|
|
658
|
+
name=name,
|
|
642
659
|
)
|
|
643
660
|
)
|
|
644
661
|
else:
|
|
@@ -649,6 +666,8 @@ class EvaluationDataset:
|
|
|
649
666
|
retrieval_context = json_obj.get(retrieval_context_key_name)
|
|
650
667
|
tools_called = json_obj.get(tools_called_key_name)
|
|
651
668
|
expected_tools = json_obj.get(expected_tools_key_name)
|
|
669
|
+
comments = json_obj.get(comments_key_name)
|
|
670
|
+
name = json_obj.get(name_key_name)
|
|
652
671
|
source_file = json_obj.get(source_file_key_name)
|
|
653
672
|
additional_metadata = json_obj.get(additional_metadata_key_name)
|
|
654
673
|
|
|
@@ -663,6 +682,8 @@ class EvaluationDataset:
|
|
|
663
682
|
tools_called=tools_called,
|
|
664
683
|
expected_tools=expected_tools,
|
|
665
684
|
additional_metadata=additional_metadata,
|
|
685
|
+
comments=comments,
|
|
686
|
+
name=name,
|
|
666
687
|
source_file=source_file,
|
|
667
688
|
)
|
|
668
689
|
)
|
|
@@ -928,6 +949,8 @@ class EvaluationDataset:
|
|
|
928
949
|
expected_outcome=golden.expected_outcome,
|
|
929
950
|
user_description=golden.user_description,
|
|
930
951
|
context=golden.context,
|
|
952
|
+
name=golden.name,
|
|
953
|
+
comments=golden.comments,
|
|
931
954
|
)
|
|
932
955
|
for golden in self.goldens
|
|
933
956
|
]
|
|
@@ -939,6 +962,8 @@ class EvaluationDataset:
|
|
|
939
962
|
actual_output=golden.actual_output,
|
|
940
963
|
retrieval_context=golden.retrieval_context,
|
|
941
964
|
context=golden.context,
|
|
965
|
+
name=golden.name,
|
|
966
|
+
comments=golden.comments,
|
|
942
967
|
source_file=golden.source_file,
|
|
943
968
|
)
|
|
944
969
|
for golden in self.goldens
|
|
@@ -981,6 +1006,8 @@ class EvaluationDataset:
|
|
|
981
1006
|
"expected_outcome": golden.expected_outcome,
|
|
982
1007
|
"user_description": golden.user_description,
|
|
983
1008
|
"context": golden.context,
|
|
1009
|
+
"name": golden.name,
|
|
1010
|
+
"comments": golden.comments,
|
|
984
1011
|
}
|
|
985
1012
|
for golden in goldens
|
|
986
1013
|
]
|
|
@@ -992,6 +1019,8 @@ class EvaluationDataset:
|
|
|
992
1019
|
"expected_output": golden.expected_output,
|
|
993
1020
|
"retrieval_context": golden.retrieval_context,
|
|
994
1021
|
"context": golden.context,
|
|
1022
|
+
"name": golden.name,
|
|
1023
|
+
"comments": golden.comments,
|
|
995
1024
|
"source_file": golden.source_file,
|
|
996
1025
|
}
|
|
997
1026
|
for golden in goldens
|
|
@@ -1010,6 +1039,8 @@ class EvaluationDataset:
|
|
|
1010
1039
|
"expected_outcome",
|
|
1011
1040
|
"user_description",
|
|
1012
1041
|
"context",
|
|
1042
|
+
"name",
|
|
1043
|
+
"comments",
|
|
1013
1044
|
]
|
|
1014
1045
|
)
|
|
1015
1046
|
for golden in goldens:
|
|
@@ -1030,6 +1061,8 @@ class EvaluationDataset:
|
|
|
1030
1061
|
golden.expected_outcome,
|
|
1031
1062
|
golden.user_description,
|
|
1032
1063
|
context,
|
|
1064
|
+
golden.name,
|
|
1065
|
+
golden.comments,
|
|
1033
1066
|
]
|
|
1034
1067
|
)
|
|
1035
1068
|
else:
|
|
@@ -1040,6 +1073,8 @@ class EvaluationDataset:
|
|
|
1040
1073
|
"expected_output",
|
|
1041
1074
|
"retrieval_context",
|
|
1042
1075
|
"context",
|
|
1076
|
+
"name",
|
|
1077
|
+
"comments",
|
|
1043
1078
|
"source_file",
|
|
1044
1079
|
]
|
|
1045
1080
|
)
|
|
@@ -1061,6 +1096,8 @@ class EvaluationDataset:
|
|
|
1061
1096
|
golden.expected_output,
|
|
1062
1097
|
retrieval_context,
|
|
1063
1098
|
context,
|
|
1099
|
+
golden.name,
|
|
1100
|
+
golden.comments,
|
|
1064
1101
|
golden.source_file,
|
|
1065
1102
|
]
|
|
1066
1103
|
)
|
|
@@ -1219,12 +1219,16 @@ async def _a_execute_agentic_test_case(
|
|
|
1219
1219
|
|
|
1220
1220
|
test_case = LLMTestCase(
|
|
1221
1221
|
input=golden.input,
|
|
1222
|
-
actual_output=
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1222
|
+
actual_output=(
|
|
1223
|
+
str(current_trace.output)
|
|
1224
|
+
if current_trace.output is not None
|
|
1225
|
+
else None
|
|
1226
|
+
),
|
|
1227
|
+
expected_output=current_trace.expected_output,
|
|
1228
|
+
context=current_trace.context,
|
|
1229
|
+
retrieval_context=current_trace.retrieval_context,
|
|
1230
|
+
tools_called=current_trace.tools_called,
|
|
1231
|
+
expected_tools=current_trace.expected_tools,
|
|
1228
1232
|
additional_metadata=golden.additional_metadata,
|
|
1229
1233
|
comments=golden.comments,
|
|
1230
1234
|
name=golden.name,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""LLM evaluated metric based on the GEval framework: https://arxiv.org/pdf/2303.16634.pdf"""
|
|
2
2
|
|
|
3
|
-
from typing import Optional, List, Tuple, Union
|
|
3
|
+
from typing import Optional, List, Tuple, Type, Union
|
|
4
4
|
from deepeval.models import DeepEvalBaseMLLM
|
|
5
5
|
from deepeval.metrics import BaseMultimodalMetric
|
|
6
6
|
from deepeval.test_case import (
|
|
@@ -10,7 +10,10 @@ from deepeval.test_case import (
|
|
|
10
10
|
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.template import (
|
|
11
11
|
MultimodalGEvalTemplate,
|
|
12
12
|
)
|
|
13
|
-
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.schema import
|
|
13
|
+
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.schema import (
|
|
14
|
+
Steps,
|
|
15
|
+
ReasonScore,
|
|
16
|
+
)
|
|
14
17
|
from deepeval.utils import get_or_create_event_loop, prettify_list
|
|
15
18
|
from deepeval.metrics.indicator import metric_progress_indicator
|
|
16
19
|
from deepeval.metrics.utils import (
|
|
@@ -49,6 +52,9 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
49
52
|
async_mode: bool = True,
|
|
50
53
|
strict_mode: bool = False,
|
|
51
54
|
verbose_mode: bool = False,
|
|
55
|
+
evaluation_template: Type[
|
|
56
|
+
MultimodalGEvalTemplate
|
|
57
|
+
] = MultimodalGEvalTemplate,
|
|
52
58
|
_include_g_eval_suffix: bool = True,
|
|
53
59
|
):
|
|
54
60
|
validate_criteria_and_evaluation_steps(criteria, evaluation_steps)
|
|
@@ -65,6 +71,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
65
71
|
self.async_mode = async_mode
|
|
66
72
|
self.verbose_mode = verbose_mode
|
|
67
73
|
self._include_g_eval_suffix = _include_g_eval_suffix
|
|
74
|
+
self.evaluation_template = evaluation_template
|
|
68
75
|
|
|
69
76
|
def measure(
|
|
70
77
|
self,
|
|
@@ -167,7 +174,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
167
174
|
g_eval_params_str = construct_g_eval_params_string(
|
|
168
175
|
self.evaluation_params
|
|
169
176
|
)
|
|
170
|
-
prompt =
|
|
177
|
+
prompt = self.evaluation_template.generate_evaluation_steps(
|
|
171
178
|
criteria=self.criteria, parameters=g_eval_params_str
|
|
172
179
|
)
|
|
173
180
|
if self.using_native_model:
|
|
@@ -190,7 +197,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
190
197
|
g_eval_params_str = construct_g_eval_params_string(
|
|
191
198
|
self.evaluation_params
|
|
192
199
|
)
|
|
193
|
-
prompt =
|
|
200
|
+
prompt = self.evaluation_template.generate_evaluation_steps(
|
|
194
201
|
criteria=self.criteria, parameters=g_eval_params_str
|
|
195
202
|
)
|
|
196
203
|
if self.using_native_model:
|
|
@@ -218,7 +225,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
218
225
|
|
|
219
226
|
if not self.strict_mode:
|
|
220
227
|
rubric_str = format_rubrics(self.rubric) if self.rubric else None
|
|
221
|
-
prompt =
|
|
228
|
+
prompt = self.evaluation_template.generate_evaluation_results(
|
|
222
229
|
evaluation_steps=number_evaluation_steps(self.evaluation_steps),
|
|
223
230
|
test_case_list=test_case_list,
|
|
224
231
|
parameters=g_eval_params_str,
|
|
@@ -227,11 +234,15 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
227
234
|
_additional_context=_additional_context,
|
|
228
235
|
)
|
|
229
236
|
else:
|
|
230
|
-
prompt =
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
prompt = (
|
|
238
|
+
self.evaluation_template.generate_strict_evaluation_results(
|
|
239
|
+
evaluation_steps=number_evaluation_steps(
|
|
240
|
+
self.evaluation_steps
|
|
241
|
+
),
|
|
242
|
+
test_case_list=test_case_list,
|
|
243
|
+
parameters=g_eval_params_str,
|
|
244
|
+
_additional_context=_additional_context,
|
|
245
|
+
)
|
|
235
246
|
)
|
|
236
247
|
try:
|
|
237
248
|
# don't use log probabilities for unsupported gpt models
|
|
@@ -256,7 +267,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
256
267
|
score, res
|
|
257
268
|
)
|
|
258
269
|
return weighted_summed_score, reason
|
|
259
|
-
except:
|
|
270
|
+
except Exception:
|
|
260
271
|
return score, reason
|
|
261
272
|
except (
|
|
262
273
|
AttributeError
|
|
@@ -289,7 +300,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
289
300
|
|
|
290
301
|
if not self.strict_mode:
|
|
291
302
|
rubric_str = format_rubrics(self.rubric) if self.rubric else None
|
|
292
|
-
prompt =
|
|
303
|
+
prompt = self.evaluation_template.generate_evaluation_results(
|
|
293
304
|
evaluation_steps=number_evaluation_steps(self.evaluation_steps),
|
|
294
305
|
test_case_list=test_case_list,
|
|
295
306
|
parameters=g_eval_params_str,
|
|
@@ -298,11 +309,15 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
298
309
|
_additional_context=_additional_context,
|
|
299
310
|
)
|
|
300
311
|
else:
|
|
301
|
-
prompt =
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
312
|
+
prompt = (
|
|
313
|
+
self.evaluation_template.generate_strict_evaluation_results(
|
|
314
|
+
evaluation_steps=number_evaluation_steps(
|
|
315
|
+
self.evaluation_steps
|
|
316
|
+
),
|
|
317
|
+
test_case_list=test_case_list,
|
|
318
|
+
parameters=g_eval_params_str,
|
|
319
|
+
_additional_context=_additional_context,
|
|
320
|
+
)
|
|
306
321
|
)
|
|
307
322
|
|
|
308
323
|
try:
|
|
@@ -326,7 +341,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
326
341
|
score, res
|
|
327
342
|
)
|
|
328
343
|
return weighted_summed_score, reason
|
|
329
|
-
except:
|
|
344
|
+
except Exception:
|
|
330
345
|
return score, reason
|
|
331
346
|
except AttributeError:
|
|
332
347
|
# This catches the case where a_generate_raw_response doesn't exist.
|
|
@@ -352,7 +367,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
352
367
|
else:
|
|
353
368
|
try:
|
|
354
369
|
self.success = self.score >= self.threshold
|
|
355
|
-
except:
|
|
370
|
+
except Exception:
|
|
356
371
|
self.success = False
|
|
357
372
|
return self.success
|
|
358
373
|
|
|
@@ -164,13 +164,12 @@ class _ObservedModel(Model):
|
|
|
164
164
|
):
|
|
165
165
|
|
|
166
166
|
if isinstance(event, ResponseCompletedEvent):
|
|
167
|
-
observer.result = (
|
|
168
|
-
event.response.
|
|
169
|
-
)
|
|
167
|
+
observer.result = make_json_serializable(
|
|
168
|
+
event.response.output
|
|
169
|
+
)
|
|
170
170
|
|
|
171
171
|
yield event
|
|
172
172
|
|
|
173
|
-
observer.__exit__(None, None, None)
|
|
174
173
|
except Exception as e:
|
|
175
174
|
observer.__exit__(type(e), e, e.__traceback__)
|
|
176
175
|
raise
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
from deepeval.tracing.tracing import (
|
|
2
2
|
Observer,
|
|
3
3
|
current_span_context,
|
|
4
|
+
trace_manager,
|
|
4
5
|
)
|
|
5
6
|
from deepeval.openai_agents.extractors import *
|
|
6
7
|
from deepeval.tracing.context import current_trace_context
|
|
8
|
+
from deepeval.tracing.utils import make_json_serializable
|
|
9
|
+
from time import perf_counter
|
|
10
|
+
from deepeval.tracing.types import TraceSpanStatus
|
|
7
11
|
|
|
8
12
|
try:
|
|
9
13
|
from agents.tracing import Span, Trace, TracingProcessor
|
|
@@ -33,14 +37,51 @@ def _check_openai_agents_available():
|
|
|
33
37
|
class DeepEvalTracingProcessor(TracingProcessor):
|
|
34
38
|
def __init__(self) -> None:
|
|
35
39
|
_check_openai_agents_available()
|
|
36
|
-
self.root_span_observers: dict[str, Observer] = {}
|
|
37
40
|
self.span_observers: dict[str, Observer] = {}
|
|
38
41
|
|
|
39
42
|
def on_trace_start(self, trace: "Trace") -> None:
|
|
40
|
-
|
|
43
|
+
trace_dict = trace.export()
|
|
44
|
+
_trace_uuid = trace_dict.get("id")
|
|
45
|
+
_thread_id = trace_dict.get("group_id")
|
|
46
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
47
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
48
|
+
|
|
49
|
+
if _thread_id or _trace_metadata:
|
|
50
|
+
_trace = trace_manager.start_new_trace(trace_uuid=str(_trace_uuid))
|
|
51
|
+
_trace.thread_id = str(_thread_id)
|
|
52
|
+
_trace.name = str(_trace_name)
|
|
53
|
+
_trace.metadata = make_json_serializable(_trace_metadata)
|
|
54
|
+
current_trace_context.set(_trace)
|
|
55
|
+
|
|
56
|
+
trace_manager.add_span( # adds a dummy root span
|
|
57
|
+
BaseSpan(
|
|
58
|
+
uuid=_trace_uuid,
|
|
59
|
+
trace_uuid=_trace_uuid,
|
|
60
|
+
parent_uuid=None,
|
|
61
|
+
start_time=perf_counter(),
|
|
62
|
+
name=_trace_name,
|
|
63
|
+
status=TraceSpanStatus.IN_PROGRESS,
|
|
64
|
+
children=[],
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
current_trace = current_trace_context.get()
|
|
69
|
+
if current_trace:
|
|
70
|
+
current_trace.name = str(_trace_name)
|
|
41
71
|
|
|
42
72
|
def on_trace_end(self, trace: "Trace") -> None:
|
|
43
|
-
|
|
73
|
+
trace_dict = trace.export()
|
|
74
|
+
_trace_uuid = trace_dict.get("id")
|
|
75
|
+
_thread_id = trace_dict.get("group_id")
|
|
76
|
+
_trace_name = trace_dict.get("workflow_name")
|
|
77
|
+
_trace_metadata = trace_dict.get("metadata")
|
|
78
|
+
|
|
79
|
+
if _thread_id or _trace_metadata:
|
|
80
|
+
trace_manager.remove_span(
|
|
81
|
+
_trace_uuid
|
|
82
|
+
) # removing the dummy root span
|
|
83
|
+
trace_manager.end_trace(_trace_uuid)
|
|
84
|
+
current_trace_context.set(None)
|
|
44
85
|
|
|
45
86
|
def on_span_start(self, span: "Span") -> None:
|
|
46
87
|
if not span.started_at:
|