deepeval 3.7.3__tar.gz → 3.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.7.3 → deepeval-3.7.4}/PKG-INFO +1 -1
- deepeval-3.7.4/deepeval/_version.py +1 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/test.py +1 -1
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/settings.py +102 -13
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/configs.py +1 -1
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/execute.py +4 -1
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/template.py +3 -3
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/template.py +6 -6
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/template.py +3 -3
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/templates.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/template.py +4 -3
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/templates.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/template.py +4 -4
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/template.py +2 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/azure_embedding_model.py +28 -15
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/local_embedding_model.py +23 -10
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/openai_embedding_model.py +18 -2
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/anthropic_model.py +17 -5
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/azure_model.py +30 -18
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/deepseek_model.py +22 -12
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/gemini_model.py +120 -87
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/grok_model.py +23 -16
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/kimi_model.py +23 -12
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/litellm_model.py +63 -25
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/local_model.py +26 -18
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/ollama_model.py +17 -7
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/openai_model.py +22 -17
- deepeval-3.7.4/deepeval/models/llms/portkey_model.py +132 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/azure_model.py +28 -19
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/gemini_model.py +102 -73
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/ollama_model.py +40 -9
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/openai_model.py +65 -14
- deepeval-3.7.4/deepeval/models/utils.py +76 -0
- deepeval-3.7.4/deepeval/optimization/__init__.py +13 -0
- deepeval-3.7.4/deepeval/optimization/adapters/__init__.py +2 -0
- deepeval-3.7.4/deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
- deepeval-3.7.4/deepeval/optimization/aggregates.py +14 -0
- deepeval-3.7.4/deepeval/optimization/configs.py +34 -0
- deepeval-3.7.4/deepeval/optimization/copro/configs.py +31 -0
- deepeval-3.7.4/deepeval/optimization/copro/loop.py +837 -0
- deepeval-3.7.4/deepeval/optimization/gepa/__init__.py +7 -0
- deepeval-3.7.4/deepeval/optimization/gepa/configs.py +115 -0
- deepeval-3.7.4/deepeval/optimization/gepa/loop.py +677 -0
- deepeval-3.7.4/deepeval/optimization/miprov2/configs.py +134 -0
- deepeval-3.7.4/deepeval/optimization/miprov2/loop.py +785 -0
- deepeval-3.7.4/deepeval/optimization/mutations/prompt_rewriter.py +458 -0
- deepeval-3.7.4/deepeval/optimization/policies/__init__.py +16 -0
- deepeval-3.7.4/deepeval/optimization/policies/selection.py +166 -0
- deepeval-3.7.4/deepeval/optimization/policies/tie_breaker.py +67 -0
- deepeval-3.7.4/deepeval/optimization/prompt_optimizer.py +462 -0
- deepeval-3.7.4/deepeval/optimization/simba/configs.py +33 -0
- deepeval-3.7.4/deepeval/optimization/simba/loop.py +983 -0
- deepeval-3.7.4/deepeval/optimization/simba/types.py +15 -0
- deepeval-3.7.4/deepeval/optimization/types.py +361 -0
- deepeval-3.7.4/deepeval/optimization/utils.py +598 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/prompt.py +10 -5
- deepeval-3.7.4/deepeval/py.typed +0 -0
- deepeval-3.7.4/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/cache.py +2 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/test_run.py +6 -1
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/utils.py +24 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/pyproject.toml +1 -1
- deepeval-3.7.3/deepeval/_version.py +0 -1
- deepeval-3.7.3/deepeval/models/utils.py +0 -31
- {deepeval-3.7.3 → deepeval-3.7.4}/LICENSE.md +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/README.md +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/annotation.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/annotation/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/extractors.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/patch.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/anthropic/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/main.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/server.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/cli/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/confident/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/logging.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/config/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/constants.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/contextvars.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/dataset.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/errors.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/subs.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/tool.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/crewai/wrapper.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/callback.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/langchain/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/key_handler.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/contextual_recall.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_recall/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/exact_match/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/exact_match/exact_match.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/g_eval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/g_eval/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/goal_accuracy/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/multimodal_answer_relevancy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/multimodal_contextual_recall.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/multimodal_contextual_relevancy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/multimodal_faithfulness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_faithfulness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/multimodal_g_eval.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_tool_correctness/multimodal_tool_correctness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pattern_match/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_adherence/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/plan_quality/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/step_efficiency/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/tool_use/tool_use.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/model_integrations/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/base_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/amazon_bedrock_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/mlllms/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/retry_policy.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/patch.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/agent.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/callback_handler.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/openai_agents/runner.py +0 -0
- {deepeval-3.7.3/deepeval/plugins → deepeval-3.7.4/deepeval/optimization/mutations}/__init__.py +0 -0
- {deepeval-3.7.3/deepeval/synthesizer/chunking → deepeval-3.7.4/deepeval/optimization/simba}/__init__.py +0 -0
- /deepeval-3.7.3/deepeval/py.typed → /deepeval-3.7.4/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/progress_context.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/simulator/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/singleton.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/telemetry.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/llm_test_case.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/mllm_test_case.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/context.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/test_exporter.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/trace_context.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/trace_test_manager.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/tracing.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/types.py +0 -0
- {deepeval-3.7.3 → deepeval-3.7.4}/deepeval/tracing/utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.7.4"
|
|
@@ -160,7 +160,7 @@ def run(
|
|
|
160
160
|
pytest_args.extend(["--identifier", identifier])
|
|
161
161
|
|
|
162
162
|
# Add the deepeval plugin file to pytest arguments
|
|
163
|
-
pytest_args.extend(["-p", "
|
|
163
|
+
pytest_args.extend(["-p", "deepeval"])
|
|
164
164
|
# Append the extra arguments collected by allow_extra_args=True
|
|
165
165
|
# Pytest will raise its own error if the arguments are invalid (error:
|
|
166
166
|
if ctx.args:
|
|
@@ -49,6 +49,8 @@ _DEPRECATED_TO_OVERRIDE = {
|
|
|
49
49
|
"DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS": "DEEPEVAL_PER_ATTEMPT_TIMEOUT_SECONDS_OVERRIDE",
|
|
50
50
|
"DEEPEVAL_TASK_GATHER_BUFFER_SECONDS": "DEEPEVAL_TASK_GATHER_BUFFER_SECONDS_OVERRIDE",
|
|
51
51
|
}
|
|
52
|
+
# Track which secrets we've warned about when loading from the legacy keyfile
|
|
53
|
+
_LEGACY_KEYFILE_SECRET_WARNED: set[str] = set()
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
def _find_legacy_enum(env_key: str):
|
|
@@ -88,6 +90,82 @@ def _is_secret_key(settings: "Settings", env_key: str) -> bool:
|
|
|
88
90
|
return False
|
|
89
91
|
|
|
90
92
|
|
|
93
|
+
def _merge_legacy_keyfile_into_env() -> None:
|
|
94
|
+
"""
|
|
95
|
+
Backwards compatibility: merge values from the legacy .deepeval/.deepeval
|
|
96
|
+
JSON keystore into os.environ for known Settings fields, without
|
|
97
|
+
overwriting existing process env vars.
|
|
98
|
+
|
|
99
|
+
This runs before we compute the Settings env fingerprint so that Pydantic
|
|
100
|
+
can see these values on first construction.
|
|
101
|
+
|
|
102
|
+
Precedence: process env -> dotenv -> legacy json
|
|
103
|
+
"""
|
|
104
|
+
# if somebody really wants to skip this behavior
|
|
105
|
+
if parse_bool(os.getenv("DEEPEVAL_DISABLE_LEGACY_KEYFILE"), default=False):
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
from deepeval.constants import HIDDEN_DIR, KEY_FILE
|
|
109
|
+
from deepeval.key_handler import (
|
|
110
|
+
KeyValues,
|
|
111
|
+
ModelKeyValues,
|
|
112
|
+
EmbeddingKeyValues,
|
|
113
|
+
SECRET_KEYS,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
key_path = Path(HIDDEN_DIR) / KEY_FILE
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
with key_path.open("r", encoding="utf-8") as f:
|
|
120
|
+
try:
|
|
121
|
+
data = json.load(f)
|
|
122
|
+
except json.JSONDecodeError:
|
|
123
|
+
# Corrupted file -> ignore, same as KeyFileHandler
|
|
124
|
+
return
|
|
125
|
+
except FileNotFoundError:
|
|
126
|
+
# No legacy store -> nothing to merge
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
if not isinstance(data, dict):
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
# Map JSON keys (enum .value) -> env keys (enum .name)
|
|
133
|
+
mapping: Dict[str, str] = {}
|
|
134
|
+
for enum in (KeyValues, ModelKeyValues, EmbeddingKeyValues):
|
|
135
|
+
for member in enum:
|
|
136
|
+
mapping[member.value] = member.name
|
|
137
|
+
|
|
138
|
+
for json_key, raw in data.items():
|
|
139
|
+
env_key = mapping.get(json_key)
|
|
140
|
+
if not env_key:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Process env always wins
|
|
144
|
+
if env_key in os.environ:
|
|
145
|
+
continue
|
|
146
|
+
if raw is None:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Mirror the legacy warning semantics for secrets, but only once per key
|
|
150
|
+
if (
|
|
151
|
+
json_key in SECRET_KEYS
|
|
152
|
+
and json_key not in _LEGACY_KEYFILE_SECRET_WARNED
|
|
153
|
+
):
|
|
154
|
+
logger.warning(
|
|
155
|
+
"Reading secret '%s' from legacy %s/%s. "
|
|
156
|
+
"Persisting API keys in plaintext is deprecated. "
|
|
157
|
+
"Move this to your environment (.env / .env.local). "
|
|
158
|
+
"This fallback will be removed in a future release.",
|
|
159
|
+
json_key,
|
|
160
|
+
HIDDEN_DIR,
|
|
161
|
+
KEY_FILE,
|
|
162
|
+
)
|
|
163
|
+
_LEGACY_KEYFILE_SECRET_WARNED.add(json_key)
|
|
164
|
+
|
|
165
|
+
# Let Settings validators coerce types; we just inject the raw string
|
|
166
|
+
os.environ[env_key] = str(raw)
|
|
167
|
+
|
|
168
|
+
|
|
91
169
|
def _read_env_file(path: Path) -> Dict[str, str]:
|
|
92
170
|
if not path.exists():
|
|
93
171
|
return {}
|
|
@@ -258,6 +336,7 @@ class Settings(BaseSettings):
|
|
|
258
336
|
GOOGLE_GENAI_USE_VERTEXAI: Optional[bool] = None
|
|
259
337
|
GOOGLE_CLOUD_PROJECT: Optional[str] = None
|
|
260
338
|
GOOGLE_CLOUD_LOCATION: Optional[str] = None
|
|
339
|
+
GOOGLE_SERVICE_ACCOUNT_KEY: Optional[str] = None
|
|
261
340
|
# Grok
|
|
262
341
|
USE_GROK_MODEL: Optional[bool] = None
|
|
263
342
|
GROK_API_KEY: Optional[SecretStr] = None
|
|
@@ -291,6 +370,12 @@ class Settings(BaseSettings):
|
|
|
291
370
|
OPENAI_MODEL_NAME: Optional[str] = None
|
|
292
371
|
OPENAI_COST_PER_INPUT_TOKEN: Optional[float] = None
|
|
293
372
|
OPENAI_COST_PER_OUTPUT_TOKEN: Optional[float] = None
|
|
373
|
+
# PortKey
|
|
374
|
+
USE_PORTKEY_MODEL: Optional[bool] = None
|
|
375
|
+
PORTKEY_API_KEY: Optional[SecretStr] = None
|
|
376
|
+
PORTKEY_MODEL_NAME: Optional[str] = None
|
|
377
|
+
PORTKEY_BASE_URL: Optional[AnyUrl] = None
|
|
378
|
+
PORTKEY_PROVIDER_NAME: Optional[str] = None
|
|
294
379
|
# Vertex AI
|
|
295
380
|
VERTEX_AI_MODEL_NAME: Optional[str] = None
|
|
296
381
|
# VLLM
|
|
@@ -516,29 +601,30 @@ class Settings(BaseSettings):
|
|
|
516
601
|
"CONFIDENT_OPEN_BROWSER",
|
|
517
602
|
"CONFIDENT_TRACE_FLUSH",
|
|
518
603
|
"CONFIDENT_TRACE_VERBOSE",
|
|
604
|
+
"CUDA_LAUNCH_BLOCKING",
|
|
605
|
+
"DEEPEVAL_VERBOSE_MODE",
|
|
606
|
+
"DEEPEVAL_GRPC_LOGGING",
|
|
607
|
+
"DEEPEVAL_DISABLE_DOTENV",
|
|
608
|
+
"DEEPEVAL_TELEMETRY_OPT_OUT",
|
|
609
|
+
"DEEPEVAL_UPDATE_WARNING_OPT_IN",
|
|
610
|
+
"ENABLE_DEEPEVAL_CACHE",
|
|
611
|
+
"ERROR_REPORTING",
|
|
612
|
+
"GOOGLE_GENAI_USE_VERTEXAI",
|
|
613
|
+
"IGNORE_DEEPEVAL_ERRORS",
|
|
614
|
+
"SKIP_DEEPEVAL_MISSING_PARAMS",
|
|
615
|
+
"TOKENIZERS_PARALLELISM",
|
|
616
|
+
"TRANSFORMERS_NO_ADVISORY_WARNINGS",
|
|
519
617
|
"USE_OPENAI_MODEL",
|
|
520
618
|
"USE_AZURE_OPENAI",
|
|
521
619
|
"USE_LOCAL_MODEL",
|
|
522
620
|
"USE_GEMINI_MODEL",
|
|
523
|
-
"GOOGLE_GENAI_USE_VERTEXAI",
|
|
524
621
|
"USE_MOONSHOT_MODEL",
|
|
525
622
|
"USE_GROK_MODEL",
|
|
526
623
|
"USE_DEEPSEEK_MODEL",
|
|
527
624
|
"USE_LITELLM",
|
|
528
625
|
"USE_AZURE_OPENAI_EMBEDDING",
|
|
529
626
|
"USE_LOCAL_EMBEDDINGS",
|
|
530
|
-
"
|
|
531
|
-
"DEEPEVAL_DISABLE_DOTENV",
|
|
532
|
-
"DEEPEVAL_TELEMETRY_OPT_OUT",
|
|
533
|
-
"DEEPEVAL_UPDATE_WARNING_OPT_IN",
|
|
534
|
-
"TOKENIZERS_PARALLELISM",
|
|
535
|
-
"TRANSFORMERS_NO_ADVISORY_WARNINGS",
|
|
536
|
-
"CUDA_LAUNCH_BLOCKING",
|
|
537
|
-
"ERROR_REPORTING",
|
|
538
|
-
"IGNORE_DEEPEVAL_ERRORS",
|
|
539
|
-
"SKIP_DEEPEVAL_MISSING_PARAMS",
|
|
540
|
-
"DEEPEVAL_VERBOSE_MODE",
|
|
541
|
-
"ENABLE_DEEPEVAL_CACHE",
|
|
627
|
+
"USE_PORTKEY_MODEL",
|
|
542
628
|
mode="before",
|
|
543
629
|
)
|
|
544
630
|
@classmethod
|
|
@@ -1008,6 +1094,9 @@ _settings_lock = threading.RLock()
|
|
|
1008
1094
|
|
|
1009
1095
|
|
|
1010
1096
|
def _calc_env_fingerprint() -> str:
|
|
1097
|
+
# Pull legacy .deepeval JSON-based settings into the process env before hashing
|
|
1098
|
+
_merge_legacy_keyfile_into_env()
|
|
1099
|
+
|
|
1011
1100
|
env = os.environ.copy()
|
|
1012
1101
|
# must hash in a stable order.
|
|
1013
1102
|
keys = sorted(
|
|
@@ -718,6 +718,8 @@ async def a_execute_test_cases(
|
|
|
718
718
|
"Gather timed out after %.1fs. Some metrics may be marked as timed out.",
|
|
719
719
|
_gather_timeout(),
|
|
720
720
|
)
|
|
721
|
+
if not error_config.ignore_errors:
|
|
722
|
+
raise
|
|
721
723
|
|
|
722
724
|
else:
|
|
723
725
|
for test_case in test_cases:
|
|
@@ -803,7 +805,8 @@ async def a_execute_test_cases(
|
|
|
803
805
|
if not t.done():
|
|
804
806
|
t.cancel()
|
|
805
807
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
806
|
-
|
|
808
|
+
if not error_config.ignore_errors:
|
|
809
|
+
raise
|
|
807
810
|
|
|
808
811
|
return test_results
|
|
809
812
|
|
|
@@ -49,12 +49,12 @@ Expected JSON format:
|
|
|
49
49
|
"verdict": "yes"
|
|
50
50
|
}},
|
|
51
51
|
{{
|
|
52
|
-
"
|
|
53
|
-
"
|
|
52
|
+
"reason": <explanation_for_irrelevance>,
|
|
53
|
+
"verdict": "no"
|
|
54
54
|
}},
|
|
55
55
|
{{
|
|
56
|
-
"
|
|
57
|
-
"
|
|
56
|
+
"reason": <explanation_for_ambiguity>,
|
|
57
|
+
"verdict": "idk"
|
|
58
58
|
}}
|
|
59
59
|
]
|
|
60
60
|
}}
|
|
@@ -70,8 +70,8 @@ class ArgumentCorrectnessTemplate:
|
|
|
70
70
|
"verdict": "yes"
|
|
71
71
|
}},
|
|
72
72
|
{{
|
|
73
|
-
"
|
|
74
|
-
"
|
|
73
|
+
"reason": "Recommending romantic Parisian comedies does not help find the highest temperature in 2023.",
|
|
74
|
+
"verdict": "no"
|
|
75
75
|
}}
|
|
76
76
|
]
|
|
77
77
|
}}
|
|
@@ -64,15 +64,15 @@ Example JSON:
|
|
|
64
64
|
{{
|
|
65
65
|
"verdicts": [
|
|
66
66
|
{{
|
|
67
|
-
"
|
|
68
|
-
"
|
|
67
|
+
"reason": "The opinion "Government meddling in healthcare bloats costs and quashes innovation" reveals a political bias, emphasizing negative views on government involvement.",
|
|
68
|
+
"verdict": "yes"
|
|
69
69
|
}},
|
|
70
70
|
{{
|
|
71
71
|
"verdict": "no"
|
|
72
72
|
}},
|
|
73
73
|
{{
|
|
74
74
|
"verdict": "no"
|
|
75
|
-
}}
|
|
75
|
+
}}
|
|
76
76
|
]
|
|
77
77
|
}}
|
|
78
78
|
|
|
@@ -19,16 +19,16 @@ Example:
|
|
|
19
19
|
{{
|
|
20
20
|
"verdicts": [
|
|
21
21
|
{{
|
|
22
|
-
"
|
|
23
|
-
"
|
|
22
|
+
"reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'",
|
|
23
|
+
"verdict": "yes"
|
|
24
24
|
}},
|
|
25
25
|
{{
|
|
26
|
-
"
|
|
27
|
-
"
|
|
26
|
+
"reason": "The text verifies that the prize was indeed won in 1968.",
|
|
27
|
+
"verdict": "yes"
|
|
28
28
|
}},
|
|
29
29
|
{{
|
|
30
|
-
"
|
|
31
|
-
"
|
|
30
|
+
"reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize.",
|
|
31
|
+
"verdict": "no"
|
|
32
32
|
}}
|
|
33
33
|
]
|
|
34
34
|
}}
|
|
@@ -55,13 +55,13 @@ Example:
|
|
|
55
55
|
{{
|
|
56
56
|
"verdicts": [
|
|
57
57
|
{{
|
|
58
|
-
"verdict": "yes",
|
|
59
58
|
"statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
|
|
59
|
+
"verdict": "yes"
|
|
60
60
|
}},
|
|
61
61
|
{{
|
|
62
|
-
"verdict": "no",
|
|
63
62
|
"statement": "There was a cat.",
|
|
64
|
-
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
|
|
63
|
+
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
|
|
64
|
+
"verdict": "no"
|
|
65
65
|
}}
|
|
66
66
|
]
|
|
67
67
|
}}
|
|
@@ -86,8 +86,8 @@ User wants to tell the assistant something.
|
|
|
86
86
|
|
|
87
87
|
Example JSON:
|
|
88
88
|
{{
|
|
89
|
-
"
|
|
90
|
-
"
|
|
89
|
+
"reason": "The user wanted to tell the assistant something but the LLM not only refused to answer but replied 'Oh ok, in that case should you need anything just let me know!', which is completely irrelevant and doesn't satisfy the user at all.",
|
|
90
|
+
"verdict": "no"
|
|
91
91
|
}}
|
|
92
92
|
===== END OF EXAMPLE ======
|
|
93
93
|
|
|
@@ -77,8 +77,8 @@ class ConversationalBinaryJudgementTemplate:
|
|
|
77
77
|
|
|
78
78
|
Example:
|
|
79
79
|
{{
|
|
80
|
-
"
|
|
81
|
-
"
|
|
80
|
+
"reason": "The assistant provided a clear and direct answer in response to every user query.",
|
|
81
|
+
"verdict": true
|
|
82
82
|
}}
|
|
83
83
|
**
|
|
84
84
|
JSON:
|
|
@@ -108,8 +108,8 @@ class ConversationalNonBinaryJudgementTemplate:
|
|
|
108
108
|
|
|
109
109
|
Example:
|
|
110
110
|
{{
|
|
111
|
-
"
|
|
112
|
-
"
|
|
111
|
+
"reason": "The assistant partially addressed the user's issue but missed clarifying their follow-up question.",
|
|
112
|
+
"verdict": "{options[1]}"
|
|
113
113
|
}}
|
|
114
114
|
**
|
|
115
115
|
JSON:
|
|
@@ -63,8 +63,8 @@ class BinaryJudgementTemplate:
|
|
|
63
63
|
IMPORTANT: Please make sure to only return a json with two keys: `verdict` (True or False), and the 'reason' key providing the reason. The verdict must be a boolean only, either True or False.
|
|
64
64
|
Example JSON:
|
|
65
65
|
{{
|
|
66
|
-
"
|
|
67
|
-
"
|
|
66
|
+
"reason": "...",
|
|
67
|
+
"verdict": True
|
|
68
68
|
}}
|
|
69
69
|
**
|
|
70
70
|
|
|
@@ -85,8 +85,8 @@ class NonBinaryJudgementTemplate:
|
|
|
85
85
|
IMPORTANT: Please make sure to only return a json with two keys: 'verdict' {options} and 'reason' providing the reason.
|
|
86
86
|
Example JSON:
|
|
87
87
|
{{
|
|
88
|
-
"
|
|
89
|
-
"
|
|
88
|
+
"reason": "...",
|
|
89
|
+
"verdict": {options}
|
|
90
90
|
}}
|
|
91
91
|
**
|
|
92
92
|
|
|
@@ -83,12 +83,12 @@ Expected JSON format:
|
|
|
83
83
|
"verdict": "yes"
|
|
84
84
|
}},
|
|
85
85
|
{{
|
|
86
|
-
"
|
|
87
|
-
"
|
|
86
|
+
"reason": <explanation_for_contradiction>,
|
|
87
|
+
"verdict": "no"
|
|
88
88
|
}},
|
|
89
89
|
{{
|
|
90
|
-
"
|
|
91
|
-
"
|
|
90
|
+
"reason": <explanation_for_uncertainty>,
|
|
91
|
+
"verdict": "idk"
|
|
92
92
|
}}
|
|
93
93
|
]
|
|
94
94
|
}}
|
|
@@ -17,12 +17,12 @@ Example:
|
|
|
17
17
|
{{
|
|
18
18
|
"verdicts": [
|
|
19
19
|
{{
|
|
20
|
-
"
|
|
21
|
-
"
|
|
20
|
+
"reason": "The actual output agrees with the provided context which states that Einstein won the Nobel Prize for his discovery of the photoelectric effect.",
|
|
21
|
+
"verdict": "yes"
|
|
22
22
|
}},
|
|
23
23
|
{{
|
|
24
|
-
"
|
|
25
|
-
"
|
|
24
|
+
"reason": "The actual output contradicts the provided context which states that Einstein won the Nobel Prize in 1968, not 1969.",
|
|
25
|
+
"verdict": "no"
|
|
26
26
|
}}
|
|
27
27
|
]
|
|
28
28
|
}}
|
|
@@ -40,8 +40,8 @@ Example JSON:
|
|
|
40
40
|
{{
|
|
41
41
|
"verdicts": [
|
|
42
42
|
{{
|
|
43
|
-
"
|
|
44
|
-
"
|
|
43
|
+
"reason": "This request falls outside the {domain} domain and should be handled by a different specialist.",
|
|
44
|
+
"verdict": "yes"
|
|
45
45
|
}},
|
|
46
46
|
{{
|
|
47
47
|
"verdict": "no"
|
|
@@ -50,19 +50,19 @@ class MultimodalAnswerRelevancyTemplate:
|
|
|
50
50
|
{{
|
|
51
51
|
"verdicts": [
|
|
52
52
|
{{
|
|
53
|
-
"
|
|
54
|
-
"
|
|
53
|
+
"reason": "The 'Shoes.' statement made in the actual output is completely irrelevant to the input, which asks about what to do in the event of an earthquake.",
|
|
54
|
+
"verdict": "no"
|
|
55
55
|
}},
|
|
56
56
|
{{
|
|
57
|
-
"
|
|
58
|
-
"
|
|
57
|
+
"reason": "The statement thanking the user for asking the question is not directly relevant to the input, but is not entirely irrelevant.",
|
|
58
|
+
"verdict": "idk"
|
|
59
59
|
}},
|
|
60
60
|
{{
|
|
61
|
-
"
|
|
62
|
-
"
|
|
61
|
+
"reason": "The question about whether there is anything else the user can help with is not directly relevant to the input, but is not entirely irrelevant.",
|
|
62
|
+
"verdict": "idk"
|
|
63
63
|
}},
|
|
64
64
|
{{
|
|
65
|
-
"verdict": "yes"
|
|
65
|
+
"verdict": "yes"
|
|
66
66
|
}}
|
|
67
67
|
]
|
|
68
68
|
}}
|
|
@@ -27,16 +27,16 @@ class MultiModalContextualPrecisionTemplate:
|
|
|
27
27
|
{{
|
|
28
28
|
"verdicts": [
|
|
29
29
|
{{
|
|
30
|
-
"
|
|
31
|
-
"
|
|
30
|
+
"reason": "It clearly addresses the question by stating that 'Einstein won the Nobel Prize for his discovery of the photoelectric effect.'",
|
|
31
|
+
"verdict": "yes"
|
|
32
32
|
}},
|
|
33
33
|
{{
|
|
34
|
-
"
|
|
35
|
-
"
|
|
34
|
+
"reason": "The text verifies that the prize was indeed won in 1968.",
|
|
35
|
+
"verdict": "yes"
|
|
36
36
|
}},
|
|
37
37
|
{{
|
|
38
|
-
"
|
|
39
|
-
"
|
|
38
|
+
"reason": "'There was a cat' is not at all relevant to the topic of winning a Nobel Prize.",
|
|
39
|
+
"verdict": "no"
|
|
40
40
|
}}
|
|
41
41
|
]
|
|
42
42
|
}}
|
|
@@ -74,13 +74,13 @@ class MultimodalContextualRelevancyTemplate:
|
|
|
74
74
|
{{
|
|
75
75
|
"verdicts": [
|
|
76
76
|
{{
|
|
77
|
-
"verdict": "yes",
|
|
78
77
|
"statement": "Einstein won the Nobel Prize for his discovery of the photoelectric effect in 1968",
|
|
78
|
+
"verdict": "yes"
|
|
79
79
|
}},
|
|
80
80
|
{{
|
|
81
|
-
"verdict": "no",
|
|
82
81
|
"statement": "There was a cat.",
|
|
83
|
-
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements."
|
|
82
|
+
"reason": "The retrieval context contained the information 'There was a cat' when it has nothing to do with Einstein's achievements.",
|
|
83
|
+
"verdict": "no"
|
|
84
84
|
}}
|
|
85
85
|
]
|
|
86
86
|
}}
|
|
@@ -107,24 +107,24 @@ class MultimodalFaithfulnessTemplate:
|
|
|
107
107
|
{{
|
|
108
108
|
"verdicts": [
|
|
109
109
|
{{
|
|
110
|
-
"
|
|
111
|
-
"
|
|
110
|
+
"reason": "The claim about Barack Obama is not directly addressed in the retrieval context, and so poses no contradiction.",
|
|
111
|
+
"verdict": "idk"
|
|
112
112
|
}},
|
|
113
113
|
{{
|
|
114
|
-
"
|
|
115
|
-
"
|
|
114
|
+
"reason": "The claim about Zurich being a city in London is incorrect but does not pose a contradiction to the retrieval context.",
|
|
115
|
+
"verdict": "idk"
|
|
116
116
|
}},
|
|
117
117
|
{{
|
|
118
118
|
"verdict": "yes"
|
|
119
119
|
}},
|
|
120
120
|
{{
|
|
121
|
-
"
|
|
122
|
-
"
|
|
121
|
+
"reason": "The actual output claims Einstein won the Nobel Prize in 1969, which is untrue as the retrieval context states it is 1968 instead.",
|
|
122
|
+
"verdict": "no"
|
|
123
123
|
}},
|
|
124
124
|
{{
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
}}
|
|
125
|
+
"reason": "The actual output claims Einstein is a German chef, which is not correct as the retrieval context states he was a German scientist instead.",
|
|
126
|
+
"verdict": "no"
|
|
127
|
+
}}
|
|
128
128
|
]
|
|
129
129
|
}}
|
|
130
130
|
===== END OF EXAMPLE ======
|
{deepeval-3.7.3 → deepeval-3.7.4}/deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py
RENAMED
|
@@ -92,8 +92,8 @@ class MultimodalGEvalTemplate:
|
|
|
92
92
|
---
|
|
93
93
|
**Example JSON:**
|
|
94
94
|
{{
|
|
95
|
-
"
|
|
96
|
-
"
|
|
95
|
+
"reason": "your concise and informative reason here",
|
|
96
|
+
"score": {score_range[0]}
|
|
97
97
|
}}
|
|
98
98
|
|
|
99
99
|
JSON:
|
|
@@ -136,8 +136,8 @@ class MultimodalGEvalTemplate:
|
|
|
136
136
|
|
|
137
137
|
Example JSON:
|
|
138
138
|
{{
|
|
139
|
-
"
|
|
140
|
-
"
|
|
139
|
+
"reason": "The text does not follow the evaluation steps provided.",
|
|
140
|
+
"score": 0
|
|
141
141
|
}}
|
|
142
142
|
**
|
|
143
143
|
|
|
@@ -26,12 +26,12 @@ Example JSON:
|
|
|
26
26
|
"verdict": "yes"
|
|
27
27
|
}},
|
|
28
28
|
{{
|
|
29
|
-
"
|
|
30
|
-
"
|
|
29
|
+
"reason": "The LLM corrected the user when the user used the wrong grammar in asking about the number of stars in the sky.",
|
|
30
|
+
"verdict": "no"
|
|
31
31
|
}},
|
|
32
32
|
{{
|
|
33
|
-
"
|
|
34
|
-
"
|
|
33
|
+
"reason": "The LLM only made 'HEY THERE' uppercase, which does not follow the instruction of making everything uppercase completely.",
|
|
34
|
+
"verdict": "no"
|
|
35
35
|
}}
|
|
36
36
|
]
|
|
37
37
|
}}
|