deepeval 3.7.8__tar.gz → 3.7.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.7.8 → deepeval-3.7.9}/PKG-INFO +5 -5
- {deepeval-3.7.8 → deepeval-3.7.9}/README.md +4 -4
- deepeval-3.7.9/deepeval/_version.py +1 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/drop/drop.py +5 -2
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/mmlu/mmlu.py +6 -4
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/utils.py +2 -2
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/gemini_model.py +27 -29
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/synthesizer.py +190 -82
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/utils.py +21 -6
- {deepeval-3.7.8 → deepeval-3.7.9}/pyproject.toml +1 -1
- deepeval-3.7.8/deepeval/_version.py +0 -1
- {deepeval-3.7.8 → deepeval-3.7.9}/LICENSE.md +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/annotation/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/anthropic/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/anthropic/extractors.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/anthropic/patch.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/anthropic/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/main.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/server.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/test.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/cli/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/confident/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/confident/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/dotenv_handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/logging.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/settings.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/config/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/constants.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/contextvars.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/dataset.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/errors.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/execute.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/crewai/subs.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/crewai/tool.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/crewai/wrapper.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/key_handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/exact_match/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/exact_match/exact_match.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/goal_accuracy/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/goal_accuracy/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pattern_match/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_adherence/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_adherence/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_adherence/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_quality/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_quality/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/plan_quality/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/step_efficiency/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/step_efficiency/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/step_efficiency/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_correctness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_correctness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_use/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_use/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_use/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/tool_use/tool_use.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/topic_adherence/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/topic_adherence/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/topic_adherence/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_precision/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_precision/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_precision/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_recall/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_recall/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_recall/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_relevancy/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_faithfulness/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_faithfulness/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_faithfulness/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/model_integrations/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/model_integrations/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/model_integrations/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/base_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/azure_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/constants.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/portkey_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/models/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai/patch.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/agent.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/callback_handler.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/openai_agents/runner.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/base.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/configs.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/copro/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/copro/copro.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/gepa/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/gepa/gepa.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/miprov2/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/miprov2/miprov2.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/miprov2/proposer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/simba/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/simba/simba.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/algorithms/simba/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/configs.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/policies.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/prompt_optimizer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/rewriter/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/rewriter/rewriter.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/rewriter/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/scorer/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/scorer/base.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/scorer/scorer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/scorer/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/optimizer/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/progress_context.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/prompt/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/prompt/prompt.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/py.typed +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/simulator/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/singleton.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/telemetry.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/context.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/otel/test_exporter.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/trace_context.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/trace_test_manager.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/tracing.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/types.py +0 -0
- {deepeval-3.7.8 → deepeval-3.7.9}/deepeval/tracing/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.7.
|
|
3
|
+
Version: 3.7.9
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -115,7 +115,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
115
115
|
|
|
116
116
|
# 🔥 Metrics and Features
|
|
117
117
|
|
|
118
|
-
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
118
|
+
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
119
119
|
|
|
120
120
|
- Supports both end-to-end and component-level LLM evaluation.
|
|
121
121
|
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that runs **locally on your machine**:
|
|
@@ -158,7 +158,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
158
158
|
- TruthfulQA
|
|
159
159
|
- HumanEval
|
|
160
160
|
- GSM8K
|
|
161
|
-
- [100% integrated with Confident AI](https://confident-ai.com?utm_source=GitHub) for the full evaluation lifecycle:
|
|
161
|
+
- [100% integrated with Confident AI](https://confident-ai.com?utm_source=GitHub) for the full evaluation & observability lifecycle:
|
|
162
162
|
- Curate/annotate evaluation datasets on the cloud
|
|
163
163
|
- Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best
|
|
164
164
|
- Fine-tune metrics for custom results
|
|
@@ -167,7 +167,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
167
167
|
- Repeat until perfection
|
|
168
168
|
|
|
169
169
|
> [!NOTE]
|
|
170
|
-
> Confident AI
|
|
170
|
+
> DeepEval is available on Confident AI, an LLM evals platform for AI observability and quality. Create an account [here.](https://app.confident-ai.com?utm_source=GitHub)
|
|
171
171
|
|
|
172
172
|
<br />
|
|
173
173
|
|
|
@@ -394,7 +394,7 @@ cp .env.example .env.local
|
|
|
394
394
|
|
|
395
395
|
# DeepEval With Confident AI
|
|
396
396
|
|
|
397
|
-
DeepEval
|
|
397
|
+
DeepEval is available on [Confident AI](https://confident-ai.com?utm_source=Github), an evals & observability platform that allows you to:
|
|
398
398
|
|
|
399
399
|
1. Curate/annotate evaluation datasets on the cloud
|
|
400
400
|
2. Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best
|
|
@@ -68,7 +68,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
68
68
|
|
|
69
69
|
# 🔥 Metrics and Features
|
|
70
70
|
|
|
71
|
-
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
71
|
+
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
72
72
|
|
|
73
73
|
- Supports both end-to-end and component-level LLM evaluation.
|
|
74
74
|
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that runs **locally on your machine**:
|
|
@@ -111,7 +111,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
111
111
|
- TruthfulQA
|
|
112
112
|
- HumanEval
|
|
113
113
|
- GSM8K
|
|
114
|
-
- [100% integrated with Confident AI](https://confident-ai.com?utm_source=GitHub) for the full evaluation lifecycle:
|
|
114
|
+
- [100% integrated with Confident AI](https://confident-ai.com?utm_source=GitHub) for the full evaluation & observability lifecycle:
|
|
115
115
|
- Curate/annotate evaluation datasets on the cloud
|
|
116
116
|
- Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best
|
|
117
117
|
- Fine-tune metrics for custom results
|
|
@@ -120,7 +120,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
120
120
|
- Repeat until perfection
|
|
121
121
|
|
|
122
122
|
> [!NOTE]
|
|
123
|
-
> Confident AI
|
|
123
|
+
> DeepEval is available on Confident AI, an LLM evals platform for AI observability and quality. Create an account [here.](https://app.confident-ai.com?utm_source=GitHub)
|
|
124
124
|
|
|
125
125
|
<br />
|
|
126
126
|
|
|
@@ -347,7 +347,7 @@ cp .env.example .env.local
|
|
|
347
347
|
|
|
348
348
|
# DeepEval With Confident AI
|
|
349
349
|
|
|
350
|
-
DeepEval
|
|
350
|
+
DeepEval is available on [Confident AI](https://confident-ai.com?utm_source=Github), an evals & observability platform that allows you to:
|
|
351
351
|
|
|
352
352
|
1. Curate/annotate evaluation datasets on the cloud
|
|
353
353
|
2. Benchmark LLM app using dataset, and compare with previous iterations to experiment which models/prompts works best
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.7.9"
|
|
@@ -279,8 +279,11 @@ class DROP(DeepEvalBaseBenchmark):
|
|
|
279
279
|
prediction = predictions[i]
|
|
280
280
|
golden = goldens[i]
|
|
281
281
|
# Define Metric
|
|
282
|
-
|
|
283
|
-
golden.expected_output,
|
|
282
|
+
expected_output = DROPTemplate.parse_str_to_list(
|
|
283
|
+
golden.expected_output, DELIMITER
|
|
284
|
+
)
|
|
285
|
+
score = self.scorer.quasi_contains_score(
|
|
286
|
+
expected_output, prediction
|
|
284
287
|
)
|
|
285
288
|
res.append({"prediction": prediction, "score": score})
|
|
286
289
|
|
|
@@ -224,10 +224,12 @@ class MMLU(DeepEvalBaseBenchmark):
|
|
|
224
224
|
responses: List[MultipleChoiceSchema] = model.batch_generate(
|
|
225
225
|
prompts=prompts, schemas=[MultipleChoiceSchema for i in prompts]
|
|
226
226
|
)
|
|
227
|
-
if isinstance(responses,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
227
|
+
if not isinstance(responses, list):
|
|
228
|
+
raise TypeError(
|
|
229
|
+
"batch_generate must return List[MultipleChoiceSchema]"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
predictions = [res.answer for res in responses]
|
|
231
233
|
except TypeError:
|
|
232
234
|
prompts = [
|
|
233
235
|
prompt
|
|
@@ -52,10 +52,10 @@ USE_EMBED_KEYS = [
|
|
|
52
52
|
|
|
53
53
|
def render_login_message():
|
|
54
54
|
print(
|
|
55
|
-
"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the
|
|
55
|
+
"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the evals cloud platform 🏡❤️"
|
|
56
56
|
)
|
|
57
57
|
print("")
|
|
58
|
-
print(pyfiglet.Figlet(font="big_money-ne").renderText("
|
|
58
|
+
print(pyfiglet.Figlet(font="big_money-ne").renderText("Confident AI"))
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def upload_and_open_link(_span: Span):
|
|
@@ -22,7 +22,7 @@ from deepeval.models.llms.constants import GEMINI_MODELS_DATA
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from google.genai import Client
|
|
24
24
|
|
|
25
|
-
default_gemini_model = "gemini-
|
|
25
|
+
default_gemini_model = "gemini-2.5-pro"
|
|
26
26
|
|
|
27
27
|
# consistent retry rules
|
|
28
28
|
retry_gemini = create_retry_decorator(PS.GOOGLE)
|
|
@@ -371,25 +371,6 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
371
371
|
client_kwargs = self._client_kwargs(**self.kwargs)
|
|
372
372
|
|
|
373
373
|
if self.should_use_vertexai():
|
|
374
|
-
service_account_key_json = require_secret_api_key(
|
|
375
|
-
self.service_account_key,
|
|
376
|
-
provider_label="Google Gemini",
|
|
377
|
-
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
378
|
-
param_hint="`service_account_key` to GeminiModel(...)",
|
|
379
|
-
)
|
|
380
|
-
|
|
381
|
-
try:
|
|
382
|
-
service_account_key = json.loads(service_account_key_json)
|
|
383
|
-
except Exception as e:
|
|
384
|
-
raise DeepEvalError(
|
|
385
|
-
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
386
|
-
) from e
|
|
387
|
-
|
|
388
|
-
if not isinstance(service_account_key, dict):
|
|
389
|
-
raise DeepEvalError(
|
|
390
|
-
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
391
|
-
)
|
|
392
|
-
|
|
393
374
|
if not self.project or not self.location:
|
|
394
375
|
raise DeepEvalError(
|
|
395
376
|
"When using Vertex AI API, both project and location are required. "
|
|
@@ -397,17 +378,34 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
397
378
|
"GOOGLE_CLOUD_LOCATION in your DeepEval configuration."
|
|
398
379
|
)
|
|
399
380
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
381
|
+
# if no service account key is provided, allow the SDK
|
|
382
|
+
# to resolve Application Default Credentials automatically.
|
|
383
|
+
credentials = None
|
|
384
|
+
if self.service_account_key is not None:
|
|
385
|
+
service_account_key_json = require_secret_api_key(
|
|
386
|
+
self.service_account_key,
|
|
387
|
+
provider_label="Google Gemini",
|
|
388
|
+
env_var_name="GOOGLE_SERVICE_ACCOUNT_KEY",
|
|
389
|
+
param_hint="`service_account_key` to GeminiModel(...)",
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
service_account_key = json.loads(service_account_key_json)
|
|
394
|
+
except Exception as e:
|
|
395
|
+
raise DeepEvalError(
|
|
396
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must be valid JSON for a Google service account."
|
|
397
|
+
) from e
|
|
398
|
+
|
|
399
|
+
if not isinstance(service_account_key, dict):
|
|
400
|
+
raise DeepEvalError(
|
|
401
|
+
"GOOGLE_SERVICE_ACCOUNT_KEY must decode to a JSON object."
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
oauth2 = self._require_oauth2()
|
|
405
|
+
credentials = oauth2.service_account.Credentials.from_service_account_info(
|
|
403
406
|
service_account_key,
|
|
404
|
-
scopes=[
|
|
405
|
-
"https://www.googleapis.com/auth/cloud-platform",
|
|
406
|
-
],
|
|
407
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
407
408
|
)
|
|
408
|
-
if service_account_key
|
|
409
|
-
else None
|
|
410
|
-
)
|
|
411
409
|
|
|
412
410
|
client = self._module.Client(
|
|
413
411
|
vertexai=True,
|
|
@@ -1383,53 +1383,99 @@ class Synthesizer:
|
|
|
1383
1383
|
# Prepare data for the DataFrame
|
|
1384
1384
|
data = []
|
|
1385
1385
|
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1386
|
+
if (
|
|
1387
|
+
self.synthetic_goldens is not None
|
|
1388
|
+
and len(self.synthetic_goldens) > 0
|
|
1389
|
+
):
|
|
1390
|
+
for golden in self.synthetic_goldens:
|
|
1391
|
+
# Extract basic fields
|
|
1392
|
+
input_text = golden.input
|
|
1393
|
+
expected_output = golden.expected_output
|
|
1394
|
+
context = golden.context
|
|
1395
|
+
actual_output = golden.actual_output
|
|
1396
|
+
retrieval_context = golden.retrieval_context
|
|
1397
|
+
metadata = golden.additional_metadata
|
|
1398
|
+
source_file = golden.source_file
|
|
1399
|
+
|
|
1400
|
+
# Calculate num_context and context_length
|
|
1401
|
+
if context is not None:
|
|
1402
|
+
num_context = len(context)
|
|
1403
|
+
context_length = sum(len(c) for c in context)
|
|
1404
|
+
else:
|
|
1405
|
+
num_context = None
|
|
1406
|
+
context_length = None
|
|
1407
|
+
|
|
1408
|
+
# Handle metadata
|
|
1409
|
+
if metadata is not None:
|
|
1410
|
+
evolutions = metadata.get("evolutions", None)
|
|
1411
|
+
synthetic_input_quality = metadata.get(
|
|
1412
|
+
"synthetic_input_quality", None
|
|
1413
|
+
)
|
|
1414
|
+
context_quality = metadata.get("context_quality", None)
|
|
1415
|
+
else:
|
|
1416
|
+
evolutions = None
|
|
1417
|
+
synthetic_input_quality = None
|
|
1418
|
+
context_quality = None
|
|
1419
|
+
|
|
1420
|
+
# Prepare a row for the DataFrame
|
|
1421
|
+
row = {
|
|
1422
|
+
"input": input_text,
|
|
1423
|
+
"actual_output": actual_output,
|
|
1424
|
+
"expected_output": expected_output,
|
|
1425
|
+
"context": context,
|
|
1426
|
+
"retrieval_context": retrieval_context,
|
|
1427
|
+
"n_chunks_per_context": num_context,
|
|
1428
|
+
"context_length": context_length,
|
|
1429
|
+
"evolutions": evolutions,
|
|
1430
|
+
"context_quality": context_quality,
|
|
1431
|
+
"synthetic_input_quality": synthetic_input_quality,
|
|
1432
|
+
"source_file": source_file,
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
# Append the row to the data list
|
|
1436
|
+
data.append(row)
|
|
1437
|
+
else:
|
|
1438
|
+
for golden in self.synthetic_conversational_goldens:
|
|
1439
|
+
# Extract basic fields
|
|
1440
|
+
scenario = golden.scenario
|
|
1441
|
+
expected_outcome = golden.expected_outcome
|
|
1442
|
+
context = golden.context
|
|
1443
|
+
metadata = golden.additional_metadata
|
|
1444
|
+
|
|
1445
|
+
# Calculate num_context and context_length
|
|
1446
|
+
if context is not None:
|
|
1447
|
+
num_context = len(context)
|
|
1448
|
+
context_length = sum(len(c) for c in context)
|
|
1449
|
+
else:
|
|
1450
|
+
num_context = None
|
|
1451
|
+
context_length = None
|
|
1452
|
+
|
|
1453
|
+
# Handle metadata
|
|
1454
|
+
if metadata is not None:
|
|
1455
|
+
evolutions = metadata.get("evolutions", None)
|
|
1456
|
+
synthetic_scenario_quality = metadata.get(
|
|
1457
|
+
"synthetic_scenario_quality", None
|
|
1458
|
+
)
|
|
1459
|
+
source_files = metadata.get("source_files", None)
|
|
1460
|
+
else:
|
|
1461
|
+
evolutions = None
|
|
1462
|
+
synthetic_scenario_quality = None
|
|
1463
|
+
source_files = None
|
|
1464
|
+
|
|
1465
|
+
# Prepare a row for the DataFrame
|
|
1466
|
+
row = {
|
|
1467
|
+
"scenario": scenario,
|
|
1468
|
+
"expected_outcome": expected_outcome,
|
|
1469
|
+
"context": context,
|
|
1470
|
+
"n_chunks_per_context": num_context,
|
|
1471
|
+
"context_length": context_length,
|
|
1472
|
+
"evolutions": evolutions,
|
|
1473
|
+
"synthetic_scenario_quality": synthetic_scenario_quality,
|
|
1474
|
+
"source_files": source_files,
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
# Append the row to the data list
|
|
1478
|
+
data.append(row)
|
|
1433
1479
|
|
|
1434
1480
|
# Create the pandas DataFrame
|
|
1435
1481
|
df = pd.DataFrame(data)
|
|
@@ -1479,7 +1525,10 @@ class Synthesizer:
|
|
|
1479
1525
|
"parameter."
|
|
1480
1526
|
)
|
|
1481
1527
|
|
|
1482
|
-
if
|
|
1528
|
+
if (
|
|
1529
|
+
len(self.synthetic_goldens) == 0
|
|
1530
|
+
and len(self.synthetic_conversational_goldens) == 0
|
|
1531
|
+
):
|
|
1483
1532
|
raise ValueError(
|
|
1484
1533
|
"No synthetic goldens found. Please generate goldens before saving goldens."
|
|
1485
1534
|
)
|
|
@@ -1494,52 +1543,111 @@ class Synthesizer:
|
|
|
1494
1543
|
full_file_path = os.path.join(directory, new_filename)
|
|
1495
1544
|
if file_type == "json":
|
|
1496
1545
|
with open(full_file_path, "w", encoding="utf-8") as file:
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1546
|
+
if (
|
|
1547
|
+
self.synthetic_goldens is not None
|
|
1548
|
+
and len(self.synthetic_goldens) > 0
|
|
1549
|
+
):
|
|
1550
|
+
json_data = [
|
|
1551
|
+
{
|
|
1552
|
+
"input": golden.input,
|
|
1553
|
+
"actual_output": golden.actual_output,
|
|
1554
|
+
"expected_output": golden.expected_output,
|
|
1555
|
+
"context": golden.context,
|
|
1556
|
+
"source_file": golden.source_file,
|
|
1557
|
+
}
|
|
1558
|
+
for golden in self.synthetic_goldens
|
|
1559
|
+
]
|
|
1560
|
+
else:
|
|
1561
|
+
json_data = [
|
|
1562
|
+
{
|
|
1563
|
+
"scenario": golden.scenario,
|
|
1564
|
+
"expected_outcome": golden.expected_outcome,
|
|
1565
|
+
"context": golden.context,
|
|
1566
|
+
"source_files": golden.additional_metadata.get(
|
|
1567
|
+
"source_files", None
|
|
1568
|
+
),
|
|
1569
|
+
}
|
|
1570
|
+
for golden in self.synthetic_conversational_goldens
|
|
1571
|
+
]
|
|
1507
1572
|
json.dump(json_data, file, indent=4, ensure_ascii=False)
|
|
1508
1573
|
elif file_type == "csv":
|
|
1509
1574
|
with open(
|
|
1510
1575
|
full_file_path, "w", newline="", encoding="utf-8"
|
|
1511
1576
|
) as file:
|
|
1512
1577
|
writer = csv.writer(file)
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1578
|
+
if (
|
|
1579
|
+
self.synthetic_goldens is not None
|
|
1580
|
+
and len(self.synthetic_goldens) > 0
|
|
1581
|
+
):
|
|
1582
|
+
writer.writerow(
|
|
1583
|
+
[
|
|
1584
|
+
"input",
|
|
1585
|
+
"actual_output",
|
|
1586
|
+
"expected_output",
|
|
1587
|
+
"context",
|
|
1588
|
+
"source_file",
|
|
1589
|
+
]
|
|
1590
|
+
)
|
|
1591
|
+
for golden in self.synthetic_goldens:
|
|
1592
|
+
writer.writerow(
|
|
1593
|
+
[
|
|
1594
|
+
golden.input,
|
|
1595
|
+
golden.actual_output,
|
|
1596
|
+
golden.expected_output,
|
|
1597
|
+
"|".join(golden.context),
|
|
1598
|
+
golden.source_file,
|
|
1599
|
+
]
|
|
1600
|
+
)
|
|
1601
|
+
else:
|
|
1523
1602
|
writer.writerow(
|
|
1524
1603
|
[
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
"
|
|
1529
|
-
golden.source_file,
|
|
1604
|
+
"scenario",
|
|
1605
|
+
"expected_outcome",
|
|
1606
|
+
"context",
|
|
1607
|
+
"source_files",
|
|
1530
1608
|
]
|
|
1531
1609
|
)
|
|
1610
|
+
for golden in self.synthetic_conversational_goldens:
|
|
1611
|
+
writer.writerow(
|
|
1612
|
+
[
|
|
1613
|
+
golden.scenario,
|
|
1614
|
+
golden.expected_outcome,
|
|
1615
|
+
"|".join(golden.context),
|
|
1616
|
+
golden.additional_metadata.get(
|
|
1617
|
+
"source_files", None
|
|
1618
|
+
),
|
|
1619
|
+
]
|
|
1620
|
+
)
|
|
1532
1621
|
elif file_type == "jsonl":
|
|
1533
1622
|
with open(full_file_path, "w", encoding="utf-8") as file:
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1623
|
+
if (
|
|
1624
|
+
self.synthetic_goldens is not None
|
|
1625
|
+
and len(self.synthetic_goldens) > 0
|
|
1626
|
+
):
|
|
1627
|
+
for golden in self.synthetic_goldens:
|
|
1628
|
+
record = {
|
|
1629
|
+
"input": golden.input,
|
|
1630
|
+
"actual_output": golden.actual_output,
|
|
1631
|
+
"expected_output": golden.expected_output,
|
|
1632
|
+
"context": golden.context,
|
|
1633
|
+
"source_file": golden.source_file,
|
|
1634
|
+
}
|
|
1635
|
+
file.write(
|
|
1636
|
+
json.dumps(record, ensure_ascii=False) + "\n"
|
|
1637
|
+
)
|
|
1638
|
+
else:
|
|
1639
|
+
for golden in self.synthetic_conversational_goldens:
|
|
1640
|
+
record = {
|
|
1641
|
+
"scenario": golden.scenario,
|
|
1642
|
+
"expected_outcome": golden.expected_outcome,
|
|
1643
|
+
"context": golden.context,
|
|
1644
|
+
"source_files": golden.additional_metadata.get(
|
|
1645
|
+
"source_files", None
|
|
1646
|
+
),
|
|
1647
|
+
}
|
|
1648
|
+
file.write(
|
|
1649
|
+
json.dumps(record, ensure_ascii=False) + "\n"
|
|
1650
|
+
)
|
|
1543
1651
|
if not quiet:
|
|
1544
1652
|
print(f"Synthetic goldens saved at {full_file_path}!")
|
|
1545
1653
|
|
|
@@ -739,14 +739,29 @@ def update_pbar(
|
|
|
739
739
|
if progress is None or pbar_id is None:
|
|
740
740
|
return
|
|
741
741
|
# Get amount to advance
|
|
742
|
-
current_task = next(t for t in progress.tasks if t.id == pbar_id)
|
|
742
|
+
current_task = next((t for t in progress.tasks if t.id == pbar_id), None)
|
|
743
|
+
if current_task is None:
|
|
744
|
+
return
|
|
745
|
+
|
|
743
746
|
if advance_to_end:
|
|
744
|
-
|
|
747
|
+
remaining = current_task.remaining
|
|
748
|
+
if remaining is not None:
|
|
749
|
+
advance = remaining
|
|
750
|
+
|
|
745
751
|
# Advance
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
progress.
|
|
752
|
+
try:
|
|
753
|
+
progress.update(pbar_id, advance=advance, total=total)
|
|
754
|
+
except KeyError:
|
|
755
|
+
# progress task may be removed concurrently via callbacks which can race with teardown.
|
|
756
|
+
return
|
|
757
|
+
|
|
758
|
+
# Remove if finished and refetch before remove to avoid acting on a stale object
|
|
759
|
+
updated_task = next((t for t in progress.tasks if t.id == pbar_id), None)
|
|
760
|
+
if updated_task is not None and updated_task.finished and remove:
|
|
761
|
+
try:
|
|
762
|
+
progress.remove_task(pbar_id)
|
|
763
|
+
except KeyError:
|
|
764
|
+
pass
|
|
750
765
|
|
|
751
766
|
|
|
752
767
|
def add_pbar(progress: Optional[Progress], description: str, total: int = 1):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__: str = "3.7.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.7.8 → deepeval-3.7.9}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt
RENAMED
|
File without changes
|
|
File without changes
|