deepeval 3.8.0__tar.gz → 3.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.8.0 → deepeval-3.8.1}/PKG-INFO +1 -1
- deepeval-3.8.1/deepeval/_version.py +1 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/annotation.py +2 -2
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/settings.py +3 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/dataset.py +6 -4
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/callback.py +1 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/schema.py +6 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/utils.py +1 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/amazon_bedrock_model.py +51 -6
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/azure_model.py +33 -7
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/gemini_model.py +6 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/prompt.py +7 -5
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/llm_test_case.py +1 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/pyproject.toml +1 -1
- deepeval-3.8.0/deepeval/_version.py +0 -1
- {deepeval-3.8.0 → deepeval-3.8.1}/LICENSE.md +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/README.md +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/annotation/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/extractors.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/patch.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/anthropic/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/main.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/server.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/test.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/cli/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/confident/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/dotenv_handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/logging.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/config/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/constants.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/contextvars.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/errors.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/execute.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/subs.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/tool.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/crewai/wrapper.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/key_handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/exact_match/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/exact_match/exact_match.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pattern_match/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/plan_quality/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/tool_use/tool_use.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/model_integrations/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/base_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/constants.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/openai_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/openrouter_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/portkey_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/models/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/patch.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/agent.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/callback_handler.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/openai_agents/runner.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/base.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/configs.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/copro.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/gepa.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/miprov2.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/proposer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/simba.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/configs.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/policies.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/prompt_optimizer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/rewriter.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/rewriter/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/base.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/scorer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/scorer/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/optimizer/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/progress_context.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/py.typed +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/simulator/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/singleton.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/telemetry.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/context.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/test_exporter.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/trace_context.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/trace_test_manager.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/tracing.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/types.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/tracing/utils.py +0 -0
- {deepeval-3.8.0 → deepeval-3.8.1}/deepeval/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.8.1"
|
|
@@ -14,7 +14,7 @@ def send_annotation(
|
|
|
14
14
|
explanation: Optional[str] = None,
|
|
15
15
|
user_id: Optional[str] = None,
|
|
16
16
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
17
|
-
) ->
|
|
17
|
+
) -> None:
|
|
18
18
|
api_annotation = APIAnnotation(
|
|
19
19
|
rating=rating,
|
|
20
20
|
traceUuid=trace_uuid,
|
|
@@ -50,7 +50,7 @@ async def a_send_annotation(
|
|
|
50
50
|
explanation: Optional[str] = None,
|
|
51
51
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
52
52
|
user_id: Optional[str] = None,
|
|
53
|
-
) ->
|
|
53
|
+
) -> None:
|
|
54
54
|
api_annotation = APIAnnotation(
|
|
55
55
|
rating=rating,
|
|
56
56
|
traceUuid=trace_uuid,
|
|
@@ -447,6 +447,9 @@ class Settings(BaseSettings):
|
|
|
447
447
|
AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
|
|
448
448
|
None, description="Azure OpenAI API key."
|
|
449
449
|
)
|
|
450
|
+
AZURE_OPENAI_AD_TOKEN: Optional[SecretStr] = Field(
|
|
451
|
+
None, description="Azure OpenAI Ad Token."
|
|
452
|
+
)
|
|
450
453
|
AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
|
|
451
454
|
None, description="Azure OpenAI endpoint URL."
|
|
452
455
|
)
|
|
@@ -84,9 +84,11 @@ class EvaluationDataset:
|
|
|
84
84
|
def __init__(
|
|
85
85
|
self,
|
|
86
86
|
goldens: Union[List[Golden], List[ConversationalGolden]] = [],
|
|
87
|
+
confident_api_key: Optional[str] = None,
|
|
87
88
|
):
|
|
88
89
|
self._alias = None
|
|
89
90
|
self._id = None
|
|
91
|
+
self.confident_api_key = confident_api_key
|
|
90
92
|
if len(goldens) > 0:
|
|
91
93
|
self._multi_turn = (
|
|
92
94
|
True if isinstance(goldens[0], ConversationalGolden) else False
|
|
@@ -722,7 +724,7 @@ class EvaluationDataset:
|
|
|
722
724
|
"Unable to push empty dataset to Confident AI, there must be at least one golden in dataset."
|
|
723
725
|
)
|
|
724
726
|
|
|
725
|
-
api = Api()
|
|
727
|
+
api = Api(api_key=self.confident_api_key)
|
|
726
728
|
api_dataset = APIDataset(
|
|
727
729
|
goldens=self.goldens if not self._multi_turn else None,
|
|
728
730
|
conversationalGoldens=(self.goldens if self._multi_turn else None),
|
|
@@ -755,7 +757,7 @@ class EvaluationDataset:
|
|
|
755
757
|
auto_convert_goldens_to_test_cases: bool = False,
|
|
756
758
|
public: bool = False,
|
|
757
759
|
):
|
|
758
|
-
api = Api()
|
|
760
|
+
api = Api(api_key=self.confident_api_key)
|
|
759
761
|
with capture_pull_dataset():
|
|
760
762
|
with Progress(
|
|
761
763
|
SpinnerColumn(style="rgb(106,0,255)"),
|
|
@@ -839,7 +841,7 @@ class EvaluationDataset:
|
|
|
839
841
|
raise ValueError(
|
|
840
842
|
f"Can't queue empty list of goldens to dataset with alias: {alias} on Confident AI."
|
|
841
843
|
)
|
|
842
|
-
api = Api()
|
|
844
|
+
api = Api(api_key=self.confident_api_key)
|
|
843
845
|
|
|
844
846
|
multi_turn = isinstance(goldens[0], ConversationalGolden)
|
|
845
847
|
|
|
@@ -871,7 +873,7 @@ class EvaluationDataset:
|
|
|
871
873
|
self,
|
|
872
874
|
alias: str,
|
|
873
875
|
):
|
|
874
|
-
api = Api()
|
|
876
|
+
api = Api(api_key=self.confident_api_key)
|
|
875
877
|
api.send_request(
|
|
876
878
|
method=HttpMethods.DELETE,
|
|
877
879
|
endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
|
|
@@ -539,4 +539,4 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
539
539
|
with self._ctx(run_id=run_id, parent_run_id=parent_run_id):
|
|
540
540
|
retriever_span.status = TraceSpanStatus.ERRORED
|
|
541
541
|
retriever_span.error = str(error)
|
|
542
|
-
exit_current_context(uuid_str=uuid_str)
|
|
542
|
+
exit_current_context(uuid_str=uuid_str)
|
|
@@ -23,6 +23,7 @@ from deepeval.metrics.contextual_recall.schema import (
|
|
|
23
23
|
ContextualRecallVerdict,
|
|
24
24
|
Verdicts,
|
|
25
25
|
ContextualRecallScoreReason,
|
|
26
|
+
VerdictWithExpectedOutput,
|
|
26
27
|
)
|
|
27
28
|
from deepeval.metrics.api import metric_data_manager
|
|
28
29
|
|
|
@@ -93,7 +94,7 @@ class ContextualRecallMetric(BaseMetric):
|
|
|
93
94
|
expected_output = test_case.expected_output
|
|
94
95
|
retrieval_context = test_case.retrieval_context
|
|
95
96
|
|
|
96
|
-
self.verdicts: List[
|
|
97
|
+
self.verdicts: List[VerdictWithExpectedOutput] = (
|
|
97
98
|
self._generate_verdicts(
|
|
98
99
|
expected_output, retrieval_context, multimodal
|
|
99
100
|
)
|
|
@@ -144,7 +145,7 @@ class ContextualRecallMetric(BaseMetric):
|
|
|
144
145
|
expected_output = test_case.expected_output
|
|
145
146
|
retrieval_context = test_case.retrieval_context
|
|
146
147
|
|
|
147
|
-
self.verdicts: List[
|
|
148
|
+
self.verdicts: List[VerdictWithExpectedOutput] = (
|
|
148
149
|
await self._a_generate_verdicts(
|
|
149
150
|
expected_output, retrieval_context, multimodal
|
|
150
151
|
)
|
|
@@ -241,13 +242,13 @@ class ContextualRecallMetric(BaseMetric):
|
|
|
241
242
|
expected_output: str,
|
|
242
243
|
retrieval_context: List[str],
|
|
243
244
|
multimodal: bool,
|
|
244
|
-
) -> List[
|
|
245
|
+
) -> List[VerdictWithExpectedOutput]:
|
|
245
246
|
prompt = self.evaluation_template.generate_verdicts(
|
|
246
247
|
expected_output=expected_output,
|
|
247
248
|
retrieval_context=retrieval_context,
|
|
248
249
|
multimodal=multimodal,
|
|
249
250
|
)
|
|
250
|
-
|
|
251
|
+
verdicts = await a_generate_with_schema_and_extract(
|
|
251
252
|
metric=self,
|
|
252
253
|
prompt=prompt,
|
|
253
254
|
schema_cls=Verdicts,
|
|
@@ -256,19 +257,28 @@ class ContextualRecallMetric(BaseMetric):
|
|
|
256
257
|
ContextualRecallVerdict(**item) for item in data["verdicts"]
|
|
257
258
|
],
|
|
258
259
|
)
|
|
260
|
+
final_verdicts = []
|
|
261
|
+
for verdict in verdicts:
|
|
262
|
+
new_verdict = VerdictWithExpectedOutput(
|
|
263
|
+
verdict=verdict.verdict,
|
|
264
|
+
reason=verdict.reason,
|
|
265
|
+
expected_output=expected_output,
|
|
266
|
+
)
|
|
267
|
+
final_verdicts.append(new_verdict)
|
|
268
|
+
return final_verdicts
|
|
259
269
|
|
|
260
270
|
def _generate_verdicts(
|
|
261
271
|
self,
|
|
262
272
|
expected_output: str,
|
|
263
273
|
retrieval_context: List[str],
|
|
264
274
|
multimodal: bool,
|
|
265
|
-
) -> List[
|
|
275
|
+
) -> List[VerdictWithExpectedOutput]:
|
|
266
276
|
prompt = self.evaluation_template.generate_verdicts(
|
|
267
277
|
expected_output=expected_output,
|
|
268
278
|
retrieval_context=retrieval_context,
|
|
269
279
|
multimodal=multimodal,
|
|
270
280
|
)
|
|
271
|
-
|
|
281
|
+
verdicts = generate_with_schema_and_extract(
|
|
272
282
|
metric=self,
|
|
273
283
|
prompt=prompt,
|
|
274
284
|
schema_cls=Verdicts,
|
|
@@ -277,6 +287,15 @@ class ContextualRecallMetric(BaseMetric):
|
|
|
277
287
|
ContextualRecallVerdict(**item) for item in data["verdicts"]
|
|
278
288
|
],
|
|
279
289
|
)
|
|
290
|
+
final_verdicts = []
|
|
291
|
+
for verdict in verdicts:
|
|
292
|
+
new_verdict = VerdictWithExpectedOutput(
|
|
293
|
+
verdict=verdict.verdict,
|
|
294
|
+
reason=verdict.reason,
|
|
295
|
+
expected_output=expected_output,
|
|
296
|
+
)
|
|
297
|
+
final_verdicts.append(new_verdict)
|
|
298
|
+
return final_verdicts
|
|
280
299
|
|
|
281
300
|
def is_successful(self) -> bool:
|
|
282
301
|
if self.error is not None:
|
|
@@ -85,7 +85,12 @@ class ImageCoherenceMetric(BaseMetric):
|
|
|
85
85
|
self.contexts_below = []
|
|
86
86
|
self.scores = []
|
|
87
87
|
self.reasons = []
|
|
88
|
-
|
|
88
|
+
image_indices = self.get_image_indices(actual_output)
|
|
89
|
+
if not image_indices:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
92
|
+
)
|
|
93
|
+
for image_index in image_indices:
|
|
89
94
|
context_above, context_below = self.get_image_context(
|
|
90
95
|
image_index, actual_output
|
|
91
96
|
)
|
|
@@ -188,6 +193,10 @@ class ImageCoherenceMetric(BaseMetric):
|
|
|
188
193
|
|
|
189
194
|
tasks = []
|
|
190
195
|
image_indices = self.get_image_indices(actual_output)
|
|
196
|
+
if not image_indices:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
199
|
+
)
|
|
191
200
|
for image_index in image_indices:
|
|
192
201
|
context_above, context_below = self.get_image_context(
|
|
193
202
|
image_index, actual_output
|
|
@@ -86,7 +86,12 @@ class ImageHelpfulnessMetric(BaseMetric):
|
|
|
86
86
|
self.contexts_below = []
|
|
87
87
|
self.scores = []
|
|
88
88
|
self.reasons = []
|
|
89
|
-
|
|
89
|
+
image_indices = self.get_image_indices(actual_output)
|
|
90
|
+
if not image_indices:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
93
|
+
)
|
|
94
|
+
for image_index in image_indices:
|
|
90
95
|
context_above, context_below = self.get_image_context(
|
|
91
96
|
image_index, actual_output
|
|
92
97
|
)
|
|
@@ -189,6 +194,10 @@ class ImageHelpfulnessMetric(BaseMetric):
|
|
|
189
194
|
|
|
190
195
|
tasks = []
|
|
191
196
|
image_indices = self.get_image_indices(actual_output)
|
|
197
|
+
if not image_indices:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
200
|
+
)
|
|
192
201
|
for image_index in image_indices:
|
|
193
202
|
context_above, context_below = self.get_image_context(
|
|
194
203
|
image_index, actual_output
|
|
@@ -86,7 +86,12 @@ class ImageReferenceMetric(BaseMetric):
|
|
|
86
86
|
self.contexts_below = []
|
|
87
87
|
self.scores = []
|
|
88
88
|
self.reasons = []
|
|
89
|
-
|
|
89
|
+
image_indices = self.get_image_indices(actual_output)
|
|
90
|
+
if not image_indices:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
93
|
+
)
|
|
94
|
+
for image_index in image_indices:
|
|
90
95
|
context_above, context_below = self.get_image_context(
|
|
91
96
|
image_index, actual_output
|
|
92
97
|
)
|
|
@@ -189,6 +194,10 @@ class ImageReferenceMetric(BaseMetric):
|
|
|
189
194
|
|
|
190
195
|
tasks = []
|
|
191
196
|
image_indices = self.get_image_indices(actual_output)
|
|
197
|
+
if not image_indices:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"The test case must have atleast one image in the `actual_output` to calculate {self.__name__} score"
|
|
200
|
+
)
|
|
192
201
|
for image_index in image_indices:
|
|
193
202
|
context_above, context_below = self.get_image_context(
|
|
194
203
|
image_index, actual_output
|
|
@@ -312,7 +312,7 @@ def check_llm_test_case_params(
|
|
|
312
312
|
if isinstance(ele, MLLMImage):
|
|
313
313
|
count += 1
|
|
314
314
|
if count != actual_output_image_count:
|
|
315
|
-
error_str = f"
|
|
315
|
+
error_str = f"Can only evaluate test cases with '{actual_output_image_count}' output images using the '{metric.__name__}' metric. `{count}` found."
|
|
316
316
|
raise ValueError(error_str)
|
|
317
317
|
|
|
318
318
|
if isinstance(test_case, LLMTestCase) is False:
|
|
@@ -14,6 +14,7 @@ from deepeval.models.retry_policy import (
|
|
|
14
14
|
sdk_retries_for,
|
|
15
15
|
)
|
|
16
16
|
from deepeval.test_case import MLLMImage
|
|
17
|
+
from deepeval.errors import DeepEvalError
|
|
17
18
|
from deepeval.utils import check_if_multimodal, convert_to_multi_modal_array
|
|
18
19
|
from deepeval.models import DeepEvalBaseLLM
|
|
19
20
|
from deepeval.models.llms.constants import BEDROCK_MODELS_DATA
|
|
@@ -155,27 +156,28 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
155
156
|
|
|
156
157
|
def generate(
|
|
157
158
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
158
|
-
) -> Tuple[Union[str, BaseModel], float]:
|
|
159
|
+
) -> Tuple[Union[str, BaseModel], Optional[float]]:
|
|
159
160
|
return safe_asyncio_run(self.a_generate(prompt, schema))
|
|
160
161
|
|
|
161
162
|
@retry_bedrock
|
|
162
163
|
async def a_generate(
|
|
163
164
|
self, prompt: str, schema: Optional[BaseModel] = None
|
|
164
|
-
) -> Tuple[Union[str, BaseModel], float]:
|
|
165
|
+
) -> Tuple[Union[str, BaseModel], Optional[float]]:
|
|
165
166
|
if check_if_multimodal(prompt):
|
|
166
167
|
prompt = convert_to_multi_modal_array(input=prompt)
|
|
167
168
|
payload = self.generate_payload(prompt)
|
|
168
169
|
else:
|
|
169
170
|
payload = self.get_converse_request_body(prompt)
|
|
170
171
|
|
|
171
|
-
payload = self.get_converse_request_body(prompt)
|
|
172
172
|
client = await self._ensure_client()
|
|
173
173
|
response = await client.converse(
|
|
174
174
|
modelId=self.get_model_name(),
|
|
175
175
|
messages=payload["messages"],
|
|
176
176
|
inferenceConfig=payload["inferenceConfig"],
|
|
177
177
|
)
|
|
178
|
-
|
|
178
|
+
|
|
179
|
+
message = self._extract_text_from_converse_response(response)
|
|
180
|
+
|
|
179
181
|
cost = self.calculate_cost(
|
|
180
182
|
response["usage"]["inputTokens"],
|
|
181
183
|
response["usage"]["outputTokens"],
|
|
@@ -206,7 +208,7 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
206
208
|
try:
|
|
207
209
|
image_raw_bytes = base64.b64decode(element.dataBase64)
|
|
208
210
|
except Exception:
|
|
209
|
-
raise
|
|
211
|
+
raise DeepEvalError(
|
|
210
212
|
f"Invalid base64 data in MLLMImage: {element._id}"
|
|
211
213
|
)
|
|
212
214
|
|
|
@@ -294,6 +296,46 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
294
296
|
# Helpers
|
|
295
297
|
###############################################
|
|
296
298
|
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _extract_text_from_converse_response(response: dict) -> str:
|
|
301
|
+
try:
|
|
302
|
+
content = response["output"]["message"]["content"]
|
|
303
|
+
except Exception as e:
|
|
304
|
+
raise DeepEvalError(
|
|
305
|
+
"Missing output.message.content in Bedrock response"
|
|
306
|
+
) from e
|
|
307
|
+
|
|
308
|
+
# Collect any text blocks (ignore reasoning/tool blocks)
|
|
309
|
+
text_parts = []
|
|
310
|
+
for block in content:
|
|
311
|
+
if isinstance(block, dict) and "text" in block:
|
|
312
|
+
v = block.get("text")
|
|
313
|
+
if isinstance(v, str) and v.strip():
|
|
314
|
+
text_parts.append(v)
|
|
315
|
+
|
|
316
|
+
if text_parts:
|
|
317
|
+
# join in case there are multiple text blocks
|
|
318
|
+
return "\n".join(text_parts)
|
|
319
|
+
|
|
320
|
+
# No text blocks present; raise an actionable error
|
|
321
|
+
keys = []
|
|
322
|
+
for b in content:
|
|
323
|
+
if isinstance(b, dict):
|
|
324
|
+
keys.append(list(b.keys()))
|
|
325
|
+
else:
|
|
326
|
+
keys.append(type(b).__name__)
|
|
327
|
+
|
|
328
|
+
stop_reason = (
|
|
329
|
+
response.get("stopReason")
|
|
330
|
+
or response.get("output", {}).get("stopReason")
|
|
331
|
+
or response.get("output", {}).get("message", {}).get("stopReason")
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
raise DeepEvalError(
|
|
335
|
+
f"Bedrock response contained no text content blocks. "
|
|
336
|
+
f"content keys={keys}, stopReason={stop_reason}"
|
|
337
|
+
)
|
|
338
|
+
|
|
297
339
|
def get_converse_request_body(self, prompt: str) -> dict:
|
|
298
340
|
|
|
299
341
|
return {
|
|
@@ -303,11 +345,14 @@ class AmazonBedrockModel(DeepEvalBaseLLM):
|
|
|
303
345
|
},
|
|
304
346
|
}
|
|
305
347
|
|
|
306
|
-
def calculate_cost(
|
|
348
|
+
def calculate_cost(
|
|
349
|
+
self, input_tokens: int, output_tokens: int
|
|
350
|
+
) -> Optional[float]:
|
|
307
351
|
if self.model_data.input_price and self.model_data.output_price:
|
|
308
352
|
input_cost = input_tokens * self.model_data.input_price
|
|
309
353
|
output_cost = output_tokens * self.model_data.output_price
|
|
310
354
|
return input_cost + output_cost
|
|
355
|
+
return None
|
|
311
356
|
|
|
312
357
|
def load_model(self):
|
|
313
358
|
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
2
2
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict, List
|
|
3
|
+
from typing import Optional, Tuple, Union, Dict, List, Callable, Awaitable
|
|
4
4
|
from pydantic import BaseModel, SecretStr
|
|
5
5
|
|
|
6
6
|
from deepeval.errors import DeepEvalError
|
|
@@ -42,6 +42,10 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
42
42
|
model: Optional[str] = None,
|
|
43
43
|
api_key: Optional[str] = None,
|
|
44
44
|
base_url: Optional[str] = None,
|
|
45
|
+
azure_ad_token_provider: Optional[
|
|
46
|
+
Callable[[], "str | Awaitable[str]"]
|
|
47
|
+
] = None,
|
|
48
|
+
azure_ad_token: Optional[str] = None,
|
|
45
49
|
temperature: Optional[float] = None,
|
|
46
50
|
cost_per_input_token: Optional[float] = None,
|
|
47
51
|
cost_per_output_token: Optional[float] = None,
|
|
@@ -67,12 +71,19 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
67
71
|
model = model or settings.AZURE_MODEL_NAME
|
|
68
72
|
deployment_name = deployment_name or settings.AZURE_DEPLOYMENT_NAME
|
|
69
73
|
|
|
74
|
+
self.azure_ad_token_provider = azure_ad_token_provider
|
|
75
|
+
|
|
70
76
|
if api_key is not None:
|
|
71
77
|
# keep it secret, keep it safe from serializings, logging and alike
|
|
72
78
|
self.api_key: Optional[SecretStr] = SecretStr(api_key)
|
|
73
79
|
else:
|
|
74
80
|
self.api_key = settings.AZURE_OPENAI_API_KEY
|
|
75
81
|
|
|
82
|
+
if azure_ad_token is not None:
|
|
83
|
+
self.azure_ad_token = azure_ad_token
|
|
84
|
+
else:
|
|
85
|
+
self.azure_ad_token = settings.AZURE_OPENAI_AD_TOKEN
|
|
86
|
+
|
|
76
87
|
api_version = api_version or settings.OPENAI_API_VERSION
|
|
77
88
|
if base_url is not None:
|
|
78
89
|
base_url = str(base_url).rstrip("/")
|
|
@@ -431,18 +442,33 @@ class AzureOpenAIModel(DeepEvalBaseLLM):
|
|
|
431
442
|
return kwargs
|
|
432
443
|
|
|
433
444
|
def _build_client(self, cls):
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
445
|
+
# Only require the API key / Azure ad token if no token provider is supplied
|
|
446
|
+
azure_ad_token = None
|
|
447
|
+
api_key = None
|
|
448
|
+
|
|
449
|
+
if self.azure_ad_token_provider is None:
|
|
450
|
+
if self.azure_ad_token is not None:
|
|
451
|
+
azure_ad_token = require_secret_api_key(
|
|
452
|
+
self.azure_ad_token,
|
|
453
|
+
provider_label="AzureOpenAI",
|
|
454
|
+
env_var_name="AZURE_OPENAI_AD_TOKEN",
|
|
455
|
+
param_hint="`azure_ad_token` to AzureOpenAIModel(...)",
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
api_key = require_secret_api_key(
|
|
459
|
+
self.api_key,
|
|
460
|
+
provider_label="AzureOpenAI",
|
|
461
|
+
env_var_name="AZURE_OPENAI_API_KEY",
|
|
462
|
+
param_hint="`api_key` to AzureOpenAIModel(...)",
|
|
463
|
+
)
|
|
440
464
|
|
|
441
465
|
kw = dict(
|
|
442
466
|
api_key=api_key,
|
|
443
467
|
api_version=self.api_version,
|
|
444
468
|
azure_endpoint=self.base_url,
|
|
445
469
|
azure_deployment=self.deployment_name,
|
|
470
|
+
azure_ad_token_provider=self.azure_ad_token_provider,
|
|
471
|
+
azure_ad_token=azure_ad_token,
|
|
446
472
|
**self._client_kwargs(),
|
|
447
473
|
)
|
|
448
474
|
try:
|
|
@@ -65,6 +65,7 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
65
65
|
project: Optional[str] = None,
|
|
66
66
|
location: Optional[str] = None,
|
|
67
67
|
service_account_key: Optional[Union[str, Dict[str, str]]] = None,
|
|
68
|
+
use_vertexai: Optional[bool] = None,
|
|
68
69
|
generation_kwargs: Optional[Dict] = None,
|
|
69
70
|
**kwargs,
|
|
70
71
|
):
|
|
@@ -93,7 +94,11 @@ class GeminiModel(DeepEvalBaseLLM):
|
|
|
93
94
|
location if location is not None else settings.GOOGLE_CLOUD_LOCATION
|
|
94
95
|
)
|
|
95
96
|
self.location = str(location).strip() if location is not None else None
|
|
96
|
-
self.use_vertexai =
|
|
97
|
+
self.use_vertexai = (
|
|
98
|
+
use_vertexai
|
|
99
|
+
if use_vertexai is not None
|
|
100
|
+
else settings.GOOGLE_GENAI_USE_VERTEXAI
|
|
101
|
+
)
|
|
97
102
|
|
|
98
103
|
self.service_account_key: Optional[SecretStr] = None
|
|
99
104
|
if service_account_key is None:
|
|
@@ -114,6 +114,7 @@ class Prompt:
|
|
|
114
114
|
output_type: Optional[OutputType] = None,
|
|
115
115
|
output_schema: Optional[Type[BaseModel]] = None,
|
|
116
116
|
interpolation_type: Optional[PromptInterpolationType] = None,
|
|
117
|
+
confident_api_key: Optional[str] = None,
|
|
117
118
|
):
|
|
118
119
|
if text_template and messages_template:
|
|
119
120
|
raise TypeError(
|
|
@@ -129,6 +130,7 @@ class Prompt:
|
|
|
129
130
|
self.interpolation_type: PromptInterpolationType = (
|
|
130
131
|
interpolation_type or PromptInterpolationType.FSTRING
|
|
131
132
|
)
|
|
133
|
+
self.confident_api_key = confident_api_key
|
|
132
134
|
|
|
133
135
|
self._version = None
|
|
134
136
|
self._prompt_version_id: Optional[str] = None
|
|
@@ -244,7 +246,7 @@ class Prompt:
|
|
|
244
246
|
raise ValueError(
|
|
245
247
|
"Prompt alias is not set. Please set an alias to continue."
|
|
246
248
|
)
|
|
247
|
-
api = Api()
|
|
249
|
+
api = Api(api_key=self.confident_api_key)
|
|
248
250
|
data, _ = api.send_request(
|
|
249
251
|
method=HttpMethods.GET,
|
|
250
252
|
endpoint=Endpoints.PROMPTS_VERSIONS_ENDPOINT,
|
|
@@ -496,7 +498,7 @@ class Prompt:
|
|
|
496
498
|
except Exception:
|
|
497
499
|
pass
|
|
498
500
|
|
|
499
|
-
api = Api()
|
|
501
|
+
api = Api(api_key=self.confident_api_key)
|
|
500
502
|
with Progress(
|
|
501
503
|
SpinnerColumn(style="rgb(106,0,255)"),
|
|
502
504
|
BarColumn(bar_width=60),
|
|
@@ -635,7 +637,7 @@ class Prompt:
|
|
|
635
637
|
# Pydantic version below 2.0
|
|
636
638
|
body = body.dict(by_alias=True, exclude_none=True)
|
|
637
639
|
|
|
638
|
-
api = Api()
|
|
640
|
+
api = Api(api_key=self.confident_api_key)
|
|
639
641
|
_, link = api.send_request(
|
|
640
642
|
method=HttpMethods.POST,
|
|
641
643
|
endpoint=Endpoints.PROMPTS_ENDPOINT,
|
|
@@ -692,7 +694,7 @@ class Prompt:
|
|
|
692
694
|
)
|
|
693
695
|
except AttributeError:
|
|
694
696
|
body = body.dict(by_alias=True, exclude_none=True)
|
|
695
|
-
api = Api()
|
|
697
|
+
api = Api(api_key=self.confident_api_key)
|
|
696
698
|
data, _ = api.send_request(
|
|
697
699
|
method=HttpMethods.PUT,
|
|
698
700
|
endpoint=Endpoints.PROMPTS_VERSION_ID_ENDPOINT,
|
|
@@ -765,7 +767,7 @@ class Prompt:
|
|
|
765
767
|
while True:
|
|
766
768
|
await asyncio.sleep(self._refresh_map[CACHE_KEY][cache_value])
|
|
767
769
|
|
|
768
|
-
api = Api()
|
|
770
|
+
api = Api(api_key=self.confident_api_key)
|
|
769
771
|
try:
|
|
770
772
|
if label:
|
|
771
773
|
data, _ = api.send_request(
|
|
@@ -386,6 +386,7 @@ class LLMTestCase(BaseModel):
|
|
|
386
386
|
[
|
|
387
387
|
re.search(pattern, self.input or "") is not None,
|
|
388
388
|
re.search(pattern, self.actual_output or "") is not None,
|
|
389
|
+
re.search(pattern, self.expected_output or "") is not None,
|
|
389
390
|
]
|
|
390
391
|
)
|
|
391
392
|
if isinstance(self.input, str)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__: str = "3.8.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepeval-3.8.0 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt
RENAMED
|
File without changes
|