deepeval 3.7.9__tar.gz → 3.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepeval-3.7.9 → deepeval-3.8.1}/PKG-INFO +3 -3
- {deepeval-3.7.9 → deepeval-3.8.1}/README.md +2 -2
- deepeval-3.8.1/deepeval/_version.py +1 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/annotation/annotation.py +2 -2
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/main.py +168 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/confident/api.py +2 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/settings.py +13 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/constants.py +1 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/dataset.py +6 -4
- deepeval-3.8.1/deepeval/integrations/langchain/callback.py +542 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/langchain/utils.py +31 -8
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/key_handler.py +8 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/schema.py +6 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/g_eval/g_eval.py +35 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/g_eval/utils.py +65 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/utils.py +1 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/__init__.py +2 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/__init__.py +2 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/amazon_bedrock_model.py +51 -6
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/azure_model.py +33 -7
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/constants.py +23 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/gemini_model.py +6 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/openai_model.py +5 -4
- deepeval-3.8.1/deepeval/models/llms/openrouter_model.py +398 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/retry_policy.py +3 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/prompt/api.py +1 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/prompt/prompt.py +7 -5
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/llm_test_case.py +1 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/tracing.py +6 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/types.py +1 -1
- {deepeval-3.7.9 → deepeval-3.8.1}/pyproject.toml +1 -1
- deepeval-3.7.9/deepeval/_version.py +0 -1
- deepeval-3.7.9/deepeval/integrations/langchain/callback.py +0 -370
- {deepeval-3.7.9 → deepeval-3.8.1}/LICENSE.md +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/annotation/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/annotation/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/anthropic/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/anthropic/extractors.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/anthropic/patch.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/anthropic/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/arc/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/arc/arc.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/arc/mode.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/arc/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/base_benchmark.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bbq/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bbq/bbq.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bbq/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bbq/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/big_bench_hard.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/navigate.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/snarks.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/cot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/boolean_expressions.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/causal_judgement.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/date_understanding.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/disambiguation_qa.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/dyck_languages.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/formal_fallacies.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/geometric_shapes.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/hyperbaton.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_five_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_seven_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/logical_deduction_three_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/movie_recommendation.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/multistep_arithmetic_two.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/navigate.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/object_counting.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/penguins_in_a_table.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/reasoning_about_colored_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/ruin_names.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/salient_translation_error_detection.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/snarks.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/sports_understanding.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/temporal_sequences.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/web_of_lies.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/shot_prompts/word_sorting.txt +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/big_bench_hard/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/bool_q.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/bool_q/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/drop/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/drop/drop.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/drop/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/drop/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/equity_med_qa.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/equity_med_qa/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/gsm8k.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/gsm8k/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/hellaswag.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/hellaswag/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/human_eval.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/human_eval/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/ifeval.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/ifeval/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/lambada/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/lambada/lambada.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/lambada/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/logi_qa.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/logi_qa/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/math_qa.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/math_qa/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/mmlu.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/mmlu/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/modes/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/results.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/squad/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/squad/squad.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/squad/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/squad/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/tasks/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/mode.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/task.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/benchmarks/winogrande/winogrande.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/dotenv_handler.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/server.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/test.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/cli/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/confident/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/confident/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/dotenv_handler.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/logging.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/settings_manager.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/config/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/contextvars.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/golden.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/test_run_tracer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/dataset/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/errors.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/compare.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/configs.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/evaluate.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/execute.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/evaluate/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/crewai/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/crewai/handler.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/crewai/subs.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/crewai/tool.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/crewai/wrapper.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/hugging_face/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/hugging_face/callback.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/hugging_face/rich_manager.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/hugging_face/tests/test_callbacks.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/hugging_face/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/langchain/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/langchain/patch.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/llama_index/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/llama_index/handler.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/llama_index/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/agent.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/instrumentator.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/otel.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/integrations/pydantic_ai/test_instrumentator.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/answer_relevancy.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/answer_relevancy/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/arena_g_eval.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/arena_g_eval/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/argument_correctness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/argument_correctness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/base_metric.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/bias/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/bias/bias.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/bias/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/bias/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/contextual_precision.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_precision/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_recall/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/contextual_relevancy.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/contextual_relevancy/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/conversation_completeness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversation_completeness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/conversational_dag.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/nodes.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_dag/templates.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/conversational_g_eval/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/dag.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/graph.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/nodes.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/templates.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/dag/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/exact_match/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/exact_match/exact_match.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/faithfulness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/faithfulness/faithfulness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/faithfulness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/faithfulness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/g_eval/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/g_eval/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/g_eval/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/goal_accuracy.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/goal_accuracy/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/hallucination/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/hallucination/hallucination.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/hallucination/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/hallucination/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/indicator.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/json_correctness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/json_correctness/json_correctness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/json_correctness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/json_correctness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/knowledge_retention.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/knowledge_retention/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp/mcp_task_completion.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/mcp_use_metric.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/mcp_use_metric/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/misuse/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/misuse/misuse.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/misuse/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/misuse/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_coherence/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/image_editing.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_editing/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_helpfulness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/image_reference/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/multimodal_metrics/text_to_image/text_to_image.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/non_advice/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/non_advice/non_advice.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/non_advice/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/non_advice/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pattern_match/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pattern_match/pattern_match.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/pii_leakage.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/pii_leakage/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/plan_adherence.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_adherence/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_quality/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_quality/plan_quality.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_quality/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/plan_quality/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/prompt_alignment.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/prompt_alignment/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/ragas.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_adherence/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_adherence/role_adherence.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_adherence/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_adherence/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_violation/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_violation/role_violation.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_violation/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/role_violation/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/step_efficiency.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/step_efficiency/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/summarization/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/summarization/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/summarization/summarization.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/summarization/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/task_completion/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/task_completion/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/task_completion/task_completion.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/task_completion/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_correctness/tool_correctness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_use/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_use/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_use/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/tool_use/tool_use.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/topic_adherence/topic_adherence.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/toxicity/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/toxicity/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/toxicity/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/toxicity/toxicity.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_precision/turn_contextual_precision.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_recall/turn_contextual_recall.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_contextual_relevancy/turn_contextual_relevancy.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_faithfulness/turn_faithfulness.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/metrics/turn_relevancy/turn_relevancy.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/model_integrations/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/model_integrations/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/model_integrations/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/_summac_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/answer_relevancy_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/base_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/detoxify_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/embedding_models/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/embedding_models/azure_embedding_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/embedding_models/local_embedding_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/embedding_models/ollama_embedding_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/embedding_models/openai_embedding_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/hallucination_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/anthropic_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/deepseek_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/grok_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/kimi_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/litellm_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/local_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/ollama_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/portkey_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/llms/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/summac_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/unbias_model.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/models/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai/extractors.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai/patch.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/agent.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/callback_handler.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/extractors.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/patch.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/openai_agents/runner.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/base.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/configs.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/copro/copro.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/gepa/gepa.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/bootstrapper.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/miprov2.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/miprov2/proposer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/simba.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/algorithms/simba/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/configs.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/policies.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/prompt_optimizer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/rewriter/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/rewriter/rewriter.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/rewriter/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/scorer/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/scorer/base.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/scorer/scorer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/scorer/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/optimizer/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/plugins/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/plugins/plugin.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/progress_context.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/prompt/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/prompt/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/py.typed +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/red_teaming/README.md +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/scorer/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/scorer/scorer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/simulator/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/simulator/conversation_simulator.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/simulator/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/simulator/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/singleton.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/base_synthesizer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/chunking/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/chunking/context_generator.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/chunking/doc_chunker.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/config.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/schema.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/synthesizer.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/templates/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/templates/template.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_extraction.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/templates/template_prompt.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/types.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/synthesizer/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/telemetry.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/arena_test_case.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/conversational_test_case.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/mcp.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_case/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/cache.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/hooks.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/hyperparameters.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/test_run/test_run.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/context.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/offline_evals/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/offline_evals/api.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/offline_evals/span.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/offline_evals/thread.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/offline_evals/trace.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/otel/__init__.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/otel/exporter.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/otel/test_exporter.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/otel/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/patchers.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/perf_epoch_bridge.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/trace_context.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/trace_test_manager.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/tracing/utils.py +0 -0
- {deepeval-3.7.9 → deepeval-3.8.1}/deepeval/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepeval
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.8.1
|
|
4
4
|
Summary: The LLM Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/confident-ai/deepeval
|
|
6
6
|
License: Apache-2.0
|
|
@@ -100,7 +100,7 @@ Description-Content-Type: text/markdown
|
|
|
100
100
|
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=zh">中文</a>
|
|
101
101
|
</p>
|
|
102
102
|
|
|
103
|
-
**DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, task completion, answer relevancy, hallucination, etc., which uses LLM-as-a-judge and other NLP models that
|
|
103
|
+
**DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, task completion, answer relevancy, hallucination, etc., which uses LLM-as-a-judge and other NLP models that run **locally on your machine** for evaluation.
|
|
104
104
|
|
|
105
105
|
Whether your LLM applications are AI agents, RAG pipelines, or chatbots, implemented via LangChain or OpenAI, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your RAG pipeline, agentic workflows, prevent prompt drifting, or even transition from OpenAI to hosting your own Deepseek R1 with confidence.
|
|
106
106
|
|
|
@@ -118,7 +118,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
118
118
|
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
119
119
|
|
|
120
120
|
- Supports both end-to-end and component-level LLM evaluation.
|
|
121
|
-
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that
|
|
121
|
+
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that run **locally on your machine**:
|
|
122
122
|
- G-Eval
|
|
123
123
|
- DAG ([deep acyclic graph](https://deepeval.com/docs/metrics-dag))
|
|
124
124
|
- **RAG metrics:**
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=zh">中文</a>
|
|
54
54
|
</p>
|
|
55
55
|
|
|
56
|
-
**DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, task completion, answer relevancy, hallucination, etc., which uses LLM-as-a-judge and other NLP models that
|
|
56
|
+
**DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, task completion, answer relevancy, hallucination, etc., which uses LLM-as-a-judge and other NLP models that run **locally on your machine** for evaluation.
|
|
57
57
|
|
|
58
58
|
Whether your LLM applications are AI agents, RAG pipelines, or chatbots, implemented via LangChain or OpenAI, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your RAG pipeline, agentic workflows, prevent prompt drifting, or even transition from OpenAI to hosting your own Deepseek R1 with confidence.
|
|
59
59
|
|
|
@@ -71,7 +71,7 @@ Whether your LLM applications are AI agents, RAG pipelines, or chatbots, impleme
|
|
|
71
71
|
> 🥳 You can now share DeepEval's test results on the cloud directly on [Confident AI](https://confident-ai.com?utm_source=GitHub)
|
|
72
72
|
|
|
73
73
|
- Supports both end-to-end and component-level LLM evaluation.
|
|
74
|
-
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that
|
|
74
|
+
- Large variety of ready-to-use LLM evaluation metrics (all with explanations) powered by **ANY** LLM of your choice, statistical methods, or NLP models that run **locally on your machine**:
|
|
75
75
|
- G-Eval
|
|
76
76
|
- DAG ([deep acyclic graph](https://deepeval.com/docs/metrics-dag))
|
|
77
77
|
- **RAG metrics:**
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = "3.8.1"
|
|
@@ -14,7 +14,7 @@ def send_annotation(
|
|
|
14
14
|
explanation: Optional[str] = None,
|
|
15
15
|
user_id: Optional[str] = None,
|
|
16
16
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
17
|
-
) ->
|
|
17
|
+
) -> None:
|
|
18
18
|
api_annotation = APIAnnotation(
|
|
19
19
|
rating=rating,
|
|
20
20
|
traceUuid=trace_uuid,
|
|
@@ -50,7 +50,7 @@ async def a_send_annotation(
|
|
|
50
50
|
explanation: Optional[str] = None,
|
|
51
51
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
52
52
|
user_id: Optional[str] = None,
|
|
53
|
-
) ->
|
|
53
|
+
) -> None:
|
|
54
54
|
api_annotation = APIAnnotation(
|
|
55
55
|
rating=rating,
|
|
56
56
|
traceUuid=trace_uuid,
|
|
@@ -2937,5 +2937,173 @@ def unset_portkey_model_env(
|
|
|
2937
2937
|
)
|
|
2938
2938
|
|
|
2939
2939
|
|
|
2940
|
+
#############################################
|
|
2941
|
+
# OpenRouter Integration ####################
|
|
2942
|
+
#############################################
|
|
2943
|
+
|
|
2944
|
+
|
|
2945
|
+
@app.command(name="set-openrouter")
|
|
2946
|
+
def set_openrouter_model_env(
|
|
2947
|
+
model: Optional[str] = typer.Option(
|
|
2948
|
+
None,
|
|
2949
|
+
"-m",
|
|
2950
|
+
"--model",
|
|
2951
|
+
help="Model identifier to use for this provider (e.g., `openai/gpt-4.1`).",
|
|
2952
|
+
),
|
|
2953
|
+
prompt_api_key: bool = typer.Option(
|
|
2954
|
+
False,
|
|
2955
|
+
"-k",
|
|
2956
|
+
"--prompt-api-key",
|
|
2957
|
+
help=(
|
|
2958
|
+
"Prompt for OPENROUTER_API_KEY (input hidden). Not suitable for CI. "
|
|
2959
|
+
"If --save (or DEEPEVAL_DEFAULT_SAVE) is used, the key is written to dotenv in plaintext."
|
|
2960
|
+
),
|
|
2961
|
+
),
|
|
2962
|
+
base_url: Optional[str] = typer.Option(
|
|
2963
|
+
None,
|
|
2964
|
+
"-u",
|
|
2965
|
+
"--base-url",
|
|
2966
|
+
help="Override the API endpoint/base URL used by this provider (default: https://openrouter.ai/api/v1).",
|
|
2967
|
+
),
|
|
2968
|
+
temperature: Optional[float] = typer.Option(
|
|
2969
|
+
None,
|
|
2970
|
+
"-t",
|
|
2971
|
+
"--temperature",
|
|
2972
|
+
help="Override the global TEMPERATURE used by LLM providers (e.g., 0.0 for deterministic behavior).",
|
|
2973
|
+
),
|
|
2974
|
+
cost_per_input_token: Optional[float] = typer.Option(
|
|
2975
|
+
None,
|
|
2976
|
+
"-i",
|
|
2977
|
+
"--cost-per-input-token",
|
|
2978
|
+
help=(
|
|
2979
|
+
"USD per input token used for cost tracking. "
|
|
2980
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2981
|
+
"costs will not be calculated."
|
|
2982
|
+
),
|
|
2983
|
+
),
|
|
2984
|
+
cost_per_output_token: Optional[float] = typer.Option(
|
|
2985
|
+
None,
|
|
2986
|
+
"-o",
|
|
2987
|
+
"--cost-per-output-token",
|
|
2988
|
+
help=(
|
|
2989
|
+
"USD per output token used for cost tracking. "
|
|
2990
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2991
|
+
"costs will not be calculated."
|
|
2992
|
+
),
|
|
2993
|
+
),
|
|
2994
|
+
save: Optional[str] = typer.Option(
|
|
2995
|
+
None,
|
|
2996
|
+
"-s",
|
|
2997
|
+
"--save",
|
|
2998
|
+
help="Persist CLI parameters as environment variables in a dotenv file. "
|
|
2999
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3000
|
+
),
|
|
3001
|
+
quiet: bool = typer.Option(
|
|
3002
|
+
False,
|
|
3003
|
+
"-q",
|
|
3004
|
+
"--quiet",
|
|
3005
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3006
|
+
),
|
|
3007
|
+
):
|
|
3008
|
+
api_key = None
|
|
3009
|
+
if prompt_api_key:
|
|
3010
|
+
api_key = coerce_blank_to_none(
|
|
3011
|
+
typer.prompt("OpenRouter API key", hide_input=True)
|
|
3012
|
+
)
|
|
3013
|
+
|
|
3014
|
+
model = coerce_blank_to_none(model)
|
|
3015
|
+
base_url = coerce_blank_to_none(base_url)
|
|
3016
|
+
|
|
3017
|
+
settings = get_settings()
|
|
3018
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3019
|
+
edit_ctx.switch_model_provider(ModelKeyValues.USE_OPENROUTER_MODEL)
|
|
3020
|
+
if model is not None:
|
|
3021
|
+
settings.OPENROUTER_MODEL_NAME = model
|
|
3022
|
+
if api_key is not None:
|
|
3023
|
+
settings.OPENROUTER_API_KEY = api_key
|
|
3024
|
+
if base_url is not None:
|
|
3025
|
+
settings.OPENROUTER_BASE_URL = base_url
|
|
3026
|
+
if temperature is not None:
|
|
3027
|
+
settings.TEMPERATURE = temperature
|
|
3028
|
+
if cost_per_input_token is not None:
|
|
3029
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = cost_per_input_token
|
|
3030
|
+
if cost_per_output_token is not None:
|
|
3031
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = cost_per_output_token
|
|
3032
|
+
|
|
3033
|
+
handled, path, updates = edit_ctx.result
|
|
3034
|
+
|
|
3035
|
+
effective_model = settings.OPENROUTER_MODEL_NAME
|
|
3036
|
+
if not effective_model:
|
|
3037
|
+
raise typer.BadParameter(
|
|
3038
|
+
"OpenRouter model name is not set. Pass --model (or set OPENROUTER_MODEL_NAME).",
|
|
3039
|
+
param_hint="--model",
|
|
3040
|
+
)
|
|
3041
|
+
|
|
3042
|
+
_handle_save_result(
|
|
3043
|
+
handled=handled,
|
|
3044
|
+
path=path,
|
|
3045
|
+
updates=updates,
|
|
3046
|
+
save=save,
|
|
3047
|
+
quiet=quiet,
|
|
3048
|
+
success_msg=(
|
|
3049
|
+
f":raising_hands: Congratulations! You're now using OpenRouter `{escape(effective_model)}` for all evals that require an LLM."
|
|
3050
|
+
),
|
|
3051
|
+
)
|
|
3052
|
+
|
|
3053
|
+
|
|
3054
|
+
@app.command(name="unset-openrouter")
|
|
3055
|
+
def unset_openrouter_model_env(
|
|
3056
|
+
save: Optional[str] = typer.Option(
|
|
3057
|
+
None,
|
|
3058
|
+
"-s",
|
|
3059
|
+
"--save",
|
|
3060
|
+
help="Remove only the OpenRouter model related environment variables from a dotenv file. "
|
|
3061
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3062
|
+
),
|
|
3063
|
+
clear_secrets: bool = typer.Option(
|
|
3064
|
+
False,
|
|
3065
|
+
"-x",
|
|
3066
|
+
"--clear-secrets",
|
|
3067
|
+
help="Also remove OPENROUTER_API_KEY from the dotenv store.",
|
|
3068
|
+
),
|
|
3069
|
+
quiet: bool = typer.Option(
|
|
3070
|
+
False,
|
|
3071
|
+
"-q",
|
|
3072
|
+
"--quiet",
|
|
3073
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3074
|
+
),
|
|
3075
|
+
):
|
|
3076
|
+
settings = get_settings()
|
|
3077
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3078
|
+
settings.USE_OPENROUTER_MODEL = None
|
|
3079
|
+
settings.OPENROUTER_MODEL_NAME = None
|
|
3080
|
+
settings.OPENROUTER_BASE_URL = None
|
|
3081
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = None
|
|
3082
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = None
|
|
3083
|
+
# Intentionally do NOT touch TEMPERATURE here; it's a global dial.
|
|
3084
|
+
if clear_secrets:
|
|
3085
|
+
settings.OPENROUTER_API_KEY = None
|
|
3086
|
+
|
|
3087
|
+
handled, path, updates = edit_ctx.result
|
|
3088
|
+
|
|
3089
|
+
if _handle_save_result(
|
|
3090
|
+
handled=handled,
|
|
3091
|
+
path=path,
|
|
3092
|
+
updates=updates,
|
|
3093
|
+
save=save,
|
|
3094
|
+
quiet=quiet,
|
|
3095
|
+
updated_msg="Removed OpenRouter model environment variables from {path}.",
|
|
3096
|
+
tip_msg=None,
|
|
3097
|
+
):
|
|
3098
|
+
if is_openai_configured():
|
|
3099
|
+
print(
|
|
3100
|
+
":raised_hands: OpenAI will still be used by default because OPENAI_API_KEY is set."
|
|
3101
|
+
)
|
|
3102
|
+
else:
|
|
3103
|
+
print(
|
|
3104
|
+
"The OpenRouter model configuration has been removed. No model is currently configured, but you can set one with the CLI or add credentials to .env[.local]."
|
|
3105
|
+
)
|
|
3106
|
+
|
|
3107
|
+
|
|
2940
3108
|
if __name__ == "__main__":
|
|
2941
3109
|
app()
|
|
@@ -106,6 +106,8 @@ class Endpoints(Enum):
|
|
|
106
106
|
EVALUATE_TRACE_ENDPOINT = "/v1/evaluate/traces/:traceUuid"
|
|
107
107
|
EVALUATE_SPAN_ENDPOINT = "/v1/evaluate/spans/:spanUuid"
|
|
108
108
|
|
|
109
|
+
METRICS_ENDPOINT = "/v1/metrics"
|
|
110
|
+
|
|
109
111
|
|
|
110
112
|
class Api:
|
|
111
113
|
def __init__(self, api_key: Optional[str] = None):
|
|
@@ -447,6 +447,9 @@ class Settings(BaseSettings):
|
|
|
447
447
|
AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
|
|
448
448
|
None, description="Azure OpenAI API key."
|
|
449
449
|
)
|
|
450
|
+
AZURE_OPENAI_AD_TOKEN: Optional[SecretStr] = Field(
|
|
451
|
+
None, description="Azure OpenAI Ad Token."
|
|
452
|
+
)
|
|
450
453
|
AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
|
|
451
454
|
None, description="Azure OpenAI endpoint URL."
|
|
452
455
|
)
|
|
@@ -627,6 +630,16 @@ class Settings(BaseSettings):
|
|
|
627
630
|
PORTKEY_PROVIDER_NAME: Optional[str] = Field(
|
|
628
631
|
None, description="Provider name/routing hint for Portkey."
|
|
629
632
|
)
|
|
633
|
+
# OpenRouter
|
|
634
|
+
USE_OPENROUTER_MODEL: Optional[bool] = None
|
|
635
|
+
OPENROUTER_API_KEY: Optional[SecretStr] = None
|
|
636
|
+
OPENROUTER_MODEL_NAME: Optional[str] = None
|
|
637
|
+
OPENROUTER_COST_PER_INPUT_TOKEN: Optional[float] = None
|
|
638
|
+
OPENROUTER_COST_PER_OUTPUT_TOKEN: Optional[float] = None
|
|
639
|
+
OPENROUTER_BASE_URL: Optional[AnyUrl] = Field(
|
|
640
|
+
None, description="OpenRouter base URL (if using a custom endpoint)."
|
|
641
|
+
)
|
|
642
|
+
|
|
630
643
|
# Vertex AI
|
|
631
644
|
VERTEX_AI_MODEL_NAME: Optional[str] = Field(
|
|
632
645
|
None,
|
|
@@ -84,9 +84,11 @@ class EvaluationDataset:
|
|
|
84
84
|
def __init__(
|
|
85
85
|
self,
|
|
86
86
|
goldens: Union[List[Golden], List[ConversationalGolden]] = [],
|
|
87
|
+
confident_api_key: Optional[str] = None,
|
|
87
88
|
):
|
|
88
89
|
self._alias = None
|
|
89
90
|
self._id = None
|
|
91
|
+
self.confident_api_key = confident_api_key
|
|
90
92
|
if len(goldens) > 0:
|
|
91
93
|
self._multi_turn = (
|
|
92
94
|
True if isinstance(goldens[0], ConversationalGolden) else False
|
|
@@ -722,7 +724,7 @@ class EvaluationDataset:
|
|
|
722
724
|
"Unable to push empty dataset to Confident AI, there must be at least one golden in dataset."
|
|
723
725
|
)
|
|
724
726
|
|
|
725
|
-
api = Api()
|
|
727
|
+
api = Api(api_key=self.confident_api_key)
|
|
726
728
|
api_dataset = APIDataset(
|
|
727
729
|
goldens=self.goldens if not self._multi_turn else None,
|
|
728
730
|
conversationalGoldens=(self.goldens if self._multi_turn else None),
|
|
@@ -755,7 +757,7 @@ class EvaluationDataset:
|
|
|
755
757
|
auto_convert_goldens_to_test_cases: bool = False,
|
|
756
758
|
public: bool = False,
|
|
757
759
|
):
|
|
758
|
-
api = Api()
|
|
760
|
+
api = Api(api_key=self.confident_api_key)
|
|
759
761
|
with capture_pull_dataset():
|
|
760
762
|
with Progress(
|
|
761
763
|
SpinnerColumn(style="rgb(106,0,255)"),
|
|
@@ -839,7 +841,7 @@ class EvaluationDataset:
|
|
|
839
841
|
raise ValueError(
|
|
840
842
|
f"Can't queue empty list of goldens to dataset with alias: {alias} on Confident AI."
|
|
841
843
|
)
|
|
842
|
-
api = Api()
|
|
844
|
+
api = Api(api_key=self.confident_api_key)
|
|
843
845
|
|
|
844
846
|
multi_turn = isinstance(goldens[0], ConversationalGolden)
|
|
845
847
|
|
|
@@ -871,7 +873,7 @@ class EvaluationDataset:
|
|
|
871
873
|
self,
|
|
872
874
|
alias: str,
|
|
873
875
|
):
|
|
874
|
-
api = Api()
|
|
876
|
+
api = Api(api_key=self.confident_api_key)
|
|
875
877
|
api.send_request(
|
|
876
878
|
method=HttpMethods.DELETE,
|
|
877
879
|
endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
|