deepeval 3.5.6__tar.gz → 3.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.5.6 → deepeval-3.5.8}/PKG-INFO +2 -1
- deepeval-3.5.8/deepeval/_version.py +1 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/dataset.py +39 -2
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +34 -19
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/agent.py +3 -4
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/prompt/api.py +1 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/prompt/prompt.py +8 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/prompt/utils.py +10 -1
- {deepeval-3.5.6 → deepeval-3.5.8}/pyproject.toml +2 -1
- deepeval-3.5.6/deepeval/_version.py +0 -1
- {deepeval-3.5.6 → deepeval-3.5.8}/LICENSE.md +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/README.md +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/annotation/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/main.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/server.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/test.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/cli/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/confident/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/confident/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/config/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/config/settings.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/config/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/constants.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/errors.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/execute.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/crewai/agent.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/crewai/patch.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/llama_index/agent/patched.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/pydantic_ai/patcher.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/integrations/pydantic_ai/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/key_handler.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/base_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/gemini_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/mlllms/gemini_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/mlllms/ollama_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/mlllms/openai_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/models/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai/patch.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/callback_handler.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/openai_agents/runner.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/progress_context.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/py.typed +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/simulator/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/singleton.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/telemetry.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/context.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/tracing.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/types.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/tracing/utils.py +0 -0
- {deepeval-3.5.6 → deepeval-3.5.8}/deepeval/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.8
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -17,6 +17,7 @@ Requires-Dist: anthropic
|
|
|
17
17
|
Requires-Dist: click (>=8.0.0,<8.3.0)
|
|
18
18
|
Requires-Dist: google-genai (>=1.9.0,<2.0.0)
|
|
19
19
|
Requires-Dist: grpcio (>=1.67.1,<2.0.0)
|
|
20
|
+
Requires-Dist: jinja2
|
|
20
21
|
Requires-Dist: nest_asyncio
|
|
21
22
|
Requires-Dist: ollama
|
|
22
23
|
Requires-Dist: openai
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.5.8"
|
|
@@ -458,6 +458,8 @@ class EvaluationDataset:
|
|
|
458
458
|
tools_called_col_delimiter: str = ";",
|
|
459
459
|
expected_tools_col_name: Optional[str] = "expected_tools",
|
|
460
460
|
expected_tools_col_delimiter: str = ";",
|
|
461
|
+
comments_key_name: str = "comments",
|
|
462
|
+
name_key_name: str = "name",
|
|
461
463
|
source_file_col_name: Optional[str] = None,
|
|
462
464
|
additional_metadata_col_name: Optional[str] = None,
|
|
463
465
|
scenario_col_name: Optional[str] = "scenario",
|
|
@@ -526,6 +528,8 @@ class EvaluationDataset:
|
|
|
526
528
|
df, expected_tools_col_name, default=""
|
|
527
529
|
)
|
|
528
530
|
]
|
|
531
|
+
comments = get_column_data(df, comments_key_name)
|
|
532
|
+
name = get_column_data(df, name_key_name)
|
|
529
533
|
source_files = get_column_data(df, source_file_col_name)
|
|
530
534
|
additional_metadatas = [
|
|
531
535
|
ast.literal_eval(metadata) if metadata else None
|
|
@@ -546,6 +550,8 @@ class EvaluationDataset:
|
|
|
546
550
|
retrieval_context,
|
|
547
551
|
tools_called,
|
|
548
552
|
expected_tools,
|
|
553
|
+
comments,
|
|
554
|
+
name,
|
|
549
555
|
source_file,
|
|
550
556
|
additional_metadata,
|
|
551
557
|
scenario,
|
|
@@ -560,6 +566,8 @@ class EvaluationDataset:
|
|
|
560
566
|
retrieval_contexts,
|
|
561
567
|
tools_called,
|
|
562
568
|
expected_tools,
|
|
569
|
+
comments,
|
|
570
|
+
name,
|
|
563
571
|
source_files,
|
|
564
572
|
additional_metadatas,
|
|
565
573
|
scenarios,
|
|
@@ -569,7 +577,7 @@ class EvaluationDataset:
|
|
|
569
577
|
):
|
|
570
578
|
if scenario:
|
|
571
579
|
self._multi_turn = True
|
|
572
|
-
parsed_turns = parse_turns(turns)
|
|
580
|
+
parsed_turns = parse_turns(turns) if turns else []
|
|
573
581
|
self.goldens.append(
|
|
574
582
|
ConversationalGolden(
|
|
575
583
|
scenario=scenario,
|
|
@@ -577,6 +585,8 @@ class EvaluationDataset:
|
|
|
577
585
|
expected_outcome=expected_outcome,
|
|
578
586
|
user_description=user_description,
|
|
579
587
|
context=context,
|
|
588
|
+
comments=comments,
|
|
589
|
+
name=name,
|
|
580
590
|
)
|
|
581
591
|
)
|
|
582
592
|
else:
|
|
@@ -592,6 +602,8 @@ class EvaluationDataset:
|
|
|
592
602
|
expected_tools=expected_tools,
|
|
593
603
|
additional_metadata=additional_metadata,
|
|
594
604
|
source_file=source_file,
|
|
605
|
+
comments=comments,
|
|
606
|
+
name=name,
|
|
595
607
|
)
|
|
596
608
|
)
|
|
597
609
|
|
|
@@ -605,6 +617,8 @@ class EvaluationDataset:
|
|
|
605
617
|
retrieval_context_key_name: Optional[str] = "retrieval_context",
|
|
606
618
|
tools_called_key_name: Optional[str] = "tools_called",
|
|
607
619
|
expected_tools_key_name: Optional[str] = "expected_tools",
|
|
620
|
+
comments_key_name: str = "comments",
|
|
621
|
+
name_key_name: str = "name",
|
|
608
622
|
source_file_key_name: Optional[str] = "source_file",
|
|
609
623
|
additional_metadata_key_name: Optional[str] = "additional_metadata",
|
|
610
624
|
scenario_key_name: Optional[str] = "scenario",
|
|
@@ -628,7 +642,8 @@ class EvaluationDataset:
|
|
|
628
642
|
expected_outcome = json_obj.get(expected_outcome_key_name)
|
|
629
643
|
user_description = json_obj.get(user_description_key_name)
|
|
630
644
|
context = json_obj.get(context_key_name)
|
|
631
|
-
|
|
645
|
+
comments = json_obj.get(comments_key_name)
|
|
646
|
+
name = json_obj.get(name_key_name)
|
|
632
647
|
parsed_turns = parse_turns(turns) if turns else []
|
|
633
648
|
|
|
634
649
|
self._multi_turn = True
|
|
@@ -639,6 +654,8 @@ class EvaluationDataset:
|
|
|
639
654
|
expected_outcome=expected_outcome,
|
|
640
655
|
user_description=user_description,
|
|
641
656
|
context=context,
|
|
657
|
+
comments=comments,
|
|
658
|
+
name=name,
|
|
642
659
|
)
|
|
643
660
|
)
|
|
644
661
|
else:
|
|
@@ -649,6 +666,8 @@ class EvaluationDataset:
|
|
|
649
666
|
retrieval_context = json_obj.get(retrieval_context_key_name)
|
|
650
667
|
tools_called = json_obj.get(tools_called_key_name)
|
|
651
668
|
expected_tools = json_obj.get(expected_tools_key_name)
|
|
669
|
+
comments = json_obj.get(comments_key_name)
|
|
670
|
+
name = json_obj.get(name_key_name)
|
|
652
671
|
source_file = json_obj.get(source_file_key_name)
|
|
653
672
|
additional_metadata = json_obj.get(additional_metadata_key_name)
|
|
654
673
|
|
|
@@ -663,6 +682,8 @@ class EvaluationDataset:
|
|
|
663
682
|
tools_called=tools_called,
|
|
664
683
|
expected_tools=expected_tools,
|
|
665
684
|
additional_metadata=additional_metadata,
|
|
685
|
+
comments=comments,
|
|
686
|
+
name=name,
|
|
666
687
|
source_file=source_file,
|
|
667
688
|
)
|
|
668
689
|
)
|
|
@@ -928,6 +949,8 @@ class EvaluationDataset:
|
|
|
928
949
|
expected_outcome=golden.expected_outcome,
|
|
929
950
|
user_description=golden.user_description,
|
|
930
951
|
context=golden.context,
|
|
952
|
+
name=golden.name,
|
|
953
|
+
comments=golden.comments,
|
|
931
954
|
)
|
|
932
955
|
for golden in self.goldens
|
|
933
956
|
]
|
|
@@ -939,6 +962,8 @@ class EvaluationDataset:
|
|
|
939
962
|
actual_output=golden.actual_output,
|
|
940
963
|
retrieval_context=golden.retrieval_context,
|
|
941
964
|
context=golden.context,
|
|
965
|
+
name=golden.name,
|
|
966
|
+
comments=golden.comments,
|
|
942
967
|
source_file=golden.source_file,
|
|
943
968
|
)
|
|
944
969
|
for golden in self.goldens
|
|
@@ -981,6 +1006,8 @@ class EvaluationDataset:
|
|
|
981
1006
|
"expected_outcome": golden.expected_outcome,
|
|
982
1007
|
"user_description": golden.user_description,
|
|
983
1008
|
"context": golden.context,
|
|
1009
|
+
"name": golden.name,
|
|
1010
|
+
"comments": golden.comments,
|
|
984
1011
|
}
|
|
985
1012
|
for golden in goldens
|
|
986
1013
|
]
|
|
@@ -992,6 +1019,8 @@ class EvaluationDataset:
|
|
|
992
1019
|
"expected_output": golden.expected_output,
|
|
993
1020
|
"retrieval_context": golden.retrieval_context,
|
|
994
1021
|
"context": golden.context,
|
|
1022
|
+
"name": golden.name,
|
|
1023
|
+
"comments": golden.comments,
|
|
995
1024
|
"source_file": golden.source_file,
|
|
996
1025
|
}
|
|
997
1026
|
for golden in goldens
|
|
@@ -1010,6 +1039,8 @@ class EvaluationDataset:
|
|
|
1010
1039
|
"expected_outcome",
|
|
1011
1040
|
"user_description",
|
|
1012
1041
|
"context",
|
|
1042
|
+
"name",
|
|
1043
|
+
"comments",
|
|
1013
1044
|
]
|
|
1014
1045
|
)
|
|
1015
1046
|
for golden in goldens:
|
|
@@ -1030,6 +1061,8 @@ class EvaluationDataset:
|
|
|
1030
1061
|
golden.expected_outcome,
|
|
1031
1062
|
golden.user_description,
|
|
1032
1063
|
context,
|
|
1064
|
+
golden.name,
|
|
1065
|
+
golden.comments,
|
|
1033
1066
|
]
|
|
1034
1067
|
)
|
|
1035
1068
|
else:
|
|
@@ -1040,6 +1073,8 @@ class EvaluationDataset:
|
|
|
1040
1073
|
"expected_output",
|
|
1041
1074
|
"retrieval_context",
|
|
1042
1075
|
"context",
|
|
1076
|
+
"name",
|
|
1077
|
+
"comments",
|
|
1043
1078
|
"source_file",
|
|
1044
1079
|
]
|
|
1045
1080
|
)
|
|
@@ -1061,6 +1096,8 @@ class EvaluationDataset:
|
|
|
1061
1096
|
golden.expected_output,
|
|
1062
1097
|
retrieval_context,
|
|
1063
1098
|
context,
|
|
1099
|
+
golden.name,
|
|
1100
|
+
golden.comments,
|
|
1064
1101
|
golden.source_file,
|
|
1065
1102
|
]
|
|
1066
1103
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""LLM evaluated metric based on the GEval framework: https://arxiv.org/pdf/2303.16634.pdf"""
|
|
2
2
|
|
|
3
|
-
from typing import Optional, List, Tuple, Union
|
|
3
|
+
from typing import Optional, List, Tuple, Type, Union
|
|
4
4
|
from deepeval.models import DeepEvalBaseMLLM
|
|
5
5
|
from deepeval.metrics import BaseMultimodalMetric
|
|
6
6
|
from deepeval.test_case import (
|
|
@@ -10,7 +10,10 @@ from deepeval.test_case import (
|
|
|
10
10
|
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.template import (
|
|
11
11
|
MultimodalGEvalTemplate,
|
|
12
12
|
)
|
|
13
|
-
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.schema import
|
|
13
|
+
from deepeval.metrics.multimodal_metrics.multimodal_g_eval.schema import (
|
|
14
|
+
Steps,
|
|
15
|
+
ReasonScore,
|
|
16
|
+
)
|
|
14
17
|
from deepeval.utils import get_or_create_event_loop, prettify_list
|
|
15
18
|
from deepeval.metrics.indicator import metric_progress_indicator
|
|
16
19
|
from deepeval.metrics.utils import (
|
|
@@ -49,6 +52,9 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
49
52
|
async_mode: bool = True,
|
|
50
53
|
strict_mode: bool = False,
|
|
51
54
|
verbose_mode: bool = False,
|
|
55
|
+
evaluation_template: Type[
|
|
56
|
+
MultimodalGEvalTemplate
|
|
57
|
+
] = MultimodalGEvalTemplate,
|
|
52
58
|
_include_g_eval_suffix: bool = True,
|
|
53
59
|
):
|
|
54
60
|
validate_criteria_and_evaluation_steps(criteria, evaluation_steps)
|
|
@@ -65,6 +71,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
65
71
|
self.async_mode = async_mode
|
|
66
72
|
self.verbose_mode = verbose_mode
|
|
67
73
|
self._include_g_eval_suffix = _include_g_eval_suffix
|
|
74
|
+
self.evaluation_template = evaluation_template
|
|
68
75
|
|
|
69
76
|
def measure(
|
|
70
77
|
self,
|
|
@@ -167,7 +174,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
167
174
|
g_eval_params_str = construct_g_eval_params_string(
|
|
168
175
|
self.evaluation_params
|
|
169
176
|
)
|
|
170
|
-
prompt =
|
|
177
|
+
prompt = self.evaluation_template.generate_evaluation_steps(
|
|
171
178
|
criteria=self.criteria, parameters=g_eval_params_str
|
|
172
179
|
)
|
|
173
180
|
if self.using_native_model:
|
|
@@ -190,7 +197,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
190
197
|
g_eval_params_str = construct_g_eval_params_string(
|
|
191
198
|
self.evaluation_params
|
|
192
199
|
)
|
|
193
|
-
prompt =
|
|
200
|
+
prompt = self.evaluation_template.generate_evaluation_steps(
|
|
194
201
|
criteria=self.criteria, parameters=g_eval_params_str
|
|
195
202
|
)
|
|
196
203
|
if self.using_native_model:
|
|
@@ -218,7 +225,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
218
225
|
|
|
219
226
|
if not self.strict_mode:
|
|
220
227
|
rubric_str = format_rubrics(self.rubric) if self.rubric else None
|
|
221
|
-
prompt =
|
|
228
|
+
prompt = self.evaluation_template.generate_evaluation_results(
|
|
222
229
|
evaluation_steps=number_evaluation_steps(self.evaluation_steps),
|
|
223
230
|
test_case_list=test_case_list,
|
|
224
231
|
parameters=g_eval_params_str,
|
|
@@ -227,11 +234,15 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
227
234
|
_additional_context=_additional_context,
|
|
228
235
|
)
|
|
229
236
|
else:
|
|
230
|
-
prompt =
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
prompt = (
|
|
238
|
+
self.evaluation_template.generate_strict_evaluation_results(
|
|
239
|
+
evaluation_steps=number_evaluation_steps(
|
|
240
|
+
self.evaluation_steps
|
|
241
|
+
),
|
|
242
|
+
test_case_list=test_case_list,
|
|
243
|
+
parameters=g_eval_params_str,
|
|
244
|
+
_additional_context=_additional_context,
|
|
245
|
+
)
|
|
235
246
|
)
|
|
236
247
|
try:
|
|
237
248
|
# don't use log probabilities for unsupported gpt models
|
|
@@ -256,7 +267,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
256
267
|
score, res
|
|
257
268
|
)
|
|
258
269
|
return weighted_summed_score, reason
|
|
259
|
-
except:
|
|
270
|
+
except Exception:
|
|
260
271
|
return score, reason
|
|
261
272
|
except (
|
|
262
273
|
AttributeError
|
|
@@ -289,7 +300,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
289
300
|
|
|
290
301
|
if not self.strict_mode:
|
|
291
302
|
rubric_str = format_rubrics(self.rubric) if self.rubric else None
|
|
292
|
-
prompt =
|
|
303
|
+
prompt = self.evaluation_template.generate_evaluation_results(
|
|
293
304
|
evaluation_steps=number_evaluation_steps(self.evaluation_steps),
|
|
294
305
|
test_case_list=test_case_list,
|
|
295
306
|
parameters=g_eval_params_str,
|
|
@@ -298,11 +309,15 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
298
309
|
_additional_context=_additional_context,
|
|
299
310
|
)
|
|
300
311
|
else:
|
|
301
|
-
prompt =
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
312
|
+
prompt = (
|
|
313
|
+
self.evaluation_template.generate_strict_evaluation_results(
|
|
314
|
+
evaluation_steps=number_evaluation_steps(
|
|
315
|
+
self.evaluation_steps
|
|
316
|
+
),
|
|
317
|
+
test_case_list=test_case_list,
|
|
318
|
+
parameters=g_eval_params_str,
|
|
319
|
+
_additional_context=_additional_context,
|
|
320
|
+
)
|
|
306
321
|
)
|
|
307
322
|
|
|
308
323
|
try:
|
|
@@ -326,7 +341,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
326
341
|
score, res
|
|
327
342
|
)
|
|
328
343
|
return weighted_summed_score, reason
|
|
329
|
-
except:
|
|
344
|
+
except Exception:
|
|
330
345
|
return score, reason
|
|
331
346
|
except AttributeError:
|
|
332
347
|
# This catches the case where a_generate_raw_response doesn't exist.
|
|
@@ -352,7 +367,7 @@ class MultimodalGEval(BaseMultimodalMetric):
|
|
|
352
367
|
else:
|
|
353
368
|
try:
|
|
354
369
|
self.success = self.score >= self.threshold
|
|
355
|
-
except:
|
|
370
|
+
except Exception:
|
|
356
371
|
self.success = False
|
|
357
372
|
return self.success
|
|
358
373
|
|
|
@@ -164,13 +164,12 @@ class _ObservedModel(Model):
|
|
|
164
164
|
):
|
|
165
165
|
|
|
166
166
|
if isinstance(event, ResponseCompletedEvent):
|
|
167
|
-
observer.result = (
|
|
168
|
-
event.response.
|
|
169
|
-
)
|
|
167
|
+
observer.result = make_json_serializable(
|
|
168
|
+
event.response.output
|
|
169
|
+
)
|
|
170
170
|
|
|
171
171
|
yield event
|
|
172
172
|
|
|
173
|
-
observer.__exit__(None, None, None)
|
|
174
173
|
except Exception as e:
|
|
175
174
|
observer.__exit__(type(e), e, e.__traceback__)
|
|
176
175
|
raise
|
|
@@ -64,6 +64,10 @@ class Prompt:
|
|
|
64
64
|
raise TypeError(
|
|
65
65
|
"Unable to create Prompt where 'alias' and 'template' are both None. Please provide at least one to continue."
|
|
66
66
|
)
|
|
67
|
+
if template and messages_template:
|
|
68
|
+
raise TypeError(
|
|
69
|
+
"Unable to create Prompt where 'template' and 'messages_template' are both provided. Please provide only one to continue."
|
|
70
|
+
)
|
|
67
71
|
|
|
68
72
|
self.alias = alias
|
|
69
73
|
self._text_template = template
|
|
@@ -71,6 +75,10 @@ class Prompt:
|
|
|
71
75
|
self._version = None
|
|
72
76
|
self._polling_tasks: Dict[str, asyncio.Task] = {}
|
|
73
77
|
self._refresh_map: Dict[str, int] = {}
|
|
78
|
+
if template:
|
|
79
|
+
self._type = PromptType.TEXT
|
|
80
|
+
elif messages_template:
|
|
81
|
+
self._type = PromptType.LIST
|
|
74
82
|
|
|
75
83
|
@property
|
|
76
84
|
def version(self):
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
from deepeval.prompt.api import PromptInterpolationType
|
|
2
1
|
import re
|
|
2
|
+
from jinja2 import Template
|
|
3
|
+
|
|
4
|
+
from deepeval.prompt.api import PromptInterpolationType
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
def interpolate_mustache(text: str, **kwargs) -> str:
|
|
@@ -25,6 +27,11 @@ def interpolate_dollar_brackets(text: str, **kwargs) -> str:
|
|
|
25
27
|
return formatted_template.format(**kwargs)
|
|
26
28
|
|
|
27
29
|
|
|
30
|
+
def interpolate_jinja(text: str, **kwargs) -> str:
|
|
31
|
+
template = Template(text)
|
|
32
|
+
return template.render(**kwargs)
|
|
33
|
+
|
|
34
|
+
|
|
28
35
|
def interpolate_text(
|
|
29
36
|
interpolation_type: PromptInterpolationType, text: str, **kwargs
|
|
30
37
|
) -> str:
|
|
@@ -37,5 +44,7 @@ def interpolate_text(
|
|
|
37
44
|
return interpolate_fstring(text, **kwargs)
|
|
38
45
|
elif interpolation_type == PromptInterpolationType.DOLLAR_BRACKETS:
|
|
39
46
|
return interpolate_dollar_brackets(text, **kwargs)
|
|
47
|
+
elif interpolation_type == PromptInterpolationType.JINJA:
|
|
48
|
+
return interpolate_jinja(text, **kwargs)
|
|
40
49
|
|
|
41
50
|
raise ValueError(f"Unsupported interpolation type: {interpolation_type}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "deepeval"
|
|
3
|
-
version = "3.5.
|
|
3
|
+
version = "3.5.8"
|
|
4
4
|
description = "The LLM Evaluation Framework"
|
|
5
5
|
authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
|
|
6
6
|
license = "Apache-2.0"
|
|
@@ -48,6 +48,7 @@ pyfiglet = "*"
|
|
|
48
48
|
python-dotenv = "^1.1.1"
|
|
49
49
|
pydantic = "^2.11.7"
|
|
50
50
|
pydantic-settings = "^2.10.1"
|
|
51
|
+
jinja2 = "*"
|
|
51
52
|
|
|
52
53
|
[tool.poetry.group.dev.dependencies]
|
|
53
54
|
twine = "5.1.1"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__: str = "3.5.6"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt
RENAMED
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt
RENAMED
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt
RENAMED
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt
RENAMED
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt
RENAMED
|
File without changes
|
{deepeval-3.5.6 → deepeval-3.5.8}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|