azure-ai-evaluation 1.5.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/CHANGELOG.md +27 -0
- {azure_ai_evaluation-1.5.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.7.0}/PKG-INFO +49 -3
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/README.md +18 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/TROUBLESHOOTING.md +39 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/__init__.py +10 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_aoai/aoai_grader.py +89 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_aoai/label_grader.py +66 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_aoai/string_check_grader.py +65 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_aoai/text_similarity_grader.py +88 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_azure/_clients.py +4 -4
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_azure/_envs.py +208 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_azure/_token_manager.py +12 -7
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/__init__.py +7 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/evaluation_onedp_client.py +163 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_client.py +139 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_configuration.py +73 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_validation.py +50 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- {azure_ai_evaluation-1.5.0/azure/ai/evaluation/_common/raiclient → azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp}/_version.py +9 -9
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/_client.py +143 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/_configuration.py +75 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/_vendor.py +40 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +39 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4494 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/models/__init__.py +142 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/models/_enums.py +162 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/models/_models.py +2228 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/operations/__init__.py +39 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/operations/_operations.py +5655 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/rai_service.py +165 -34
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/utils.py +79 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_constants.py +16 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_converters/_ai_services.py +162 -118
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_converters/_models.py +76 -6
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_eval_mapping.py +73 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +30 -16
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +8 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +5 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +17 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_evaluate.py +325 -76
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +553 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_utils.py +117 -4
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +11 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +9 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +12 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +12 -3
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +12 -3
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +2 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +12 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +14 -4
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +9 -8
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +10 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +11 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +469 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +11 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +16 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +11 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_qa/_qa.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +11 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -2
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +11 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +16 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +86 -12
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +10 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +11 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_exceptions.py +2 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -14
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/_flows.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +51 -32
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +129 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +6 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +6 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +69 -29
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +19 -1
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +124 -0
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +15 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/prompty/_connection.py +11 -74
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/prompty/_prompty.py +119 -9
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/prompty/_utils.py +72 -2
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +114 -22
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_version.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_attack_strategy.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_red_team.py +976 -546
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/red_team/_utils/metric_mapping.py +23 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_adversarial_simulator.py +63 -39
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_constants.py +1 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_conversation/__init__.py +13 -6
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_conversation/_conversation.py +2 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure_ai_evaluation-1.7.0/azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +38 -25
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +43 -28
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +26 -18
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +5 -10
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +65 -41
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +15 -10
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0/azure_ai_evaluation.egg-info}/PKG-INFO +49 -3
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure_ai_evaluation.egg-info/SOURCES.txt +69 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure_ai_evaluation.egg-info/requires.txt +3 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/pyproject.toml +5 -4
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/agent_evaluation.ipynb +154 -28
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/response_completeness.ipynb +27 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/evaluation_samples_evaluate.py +53 -0
- azure_ai_evaluation-1.7.0/samples/evaluation_samples_evaluate_fdp.py +526 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/evaluation_samples_threshold.py +61 -2
- azure_ai_evaluation-1.7.0/samples/red_team_agent_tool_sample.py +170 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/red_team_samples.py +29 -29
- azure_ai_evaluation-1.7.0/samples/red_team_skip_upload.py +95 -0
- azure_ai_evaluation-1.7.0/samples/semantic_kernel_red_team_agent_sample.py +98 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/setup.py +3 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/conftest.py +59 -7
- azure_ai_evaluation-1.7.0/tests/converters/ai_agent_converter/serialization_helper.py +211 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +101 -9
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_adv_simulator.py +23 -18
- azure_ai_evaluation-1.7.0/tests/e2etests/test_aoai_graders.py +198 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_builtin_evaluators.py +227 -87
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_evaluate.py +2 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_lite_management_client.py +3 -3
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_mass_evaluate.py +225 -76
- azure_ai_evaluation-1.7.0/tests/e2etests/test_remote_evaluation.py +101 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_sim_and_eval.py +52 -56
- azure_ai_evaluation-1.7.0/tests/unittests/test_aoai_integration_features.py +168 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_batch_run_context.py +1 -1
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_completeness_evaluator.py +29 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_content_safety_rai_script.py +2 -3
- azure_ai_evaluation-1.7.0/tests/unittests/test_document_retrieval_evaluator.py +228 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_eval_run.py +1 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluate.py +117 -4
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_formatting_utils.py +9 -9
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_red_team.py +178 -114
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_red_team_result.py +35 -35
- azure_ai_evaluation-1.7.0/tests/unittests/test_remote_evaluation_features.py +66 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_safety_evaluation.py +113 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_save_eval.py +13 -1
- azure_ai_evaluation-1.7.0/tests/unittests/test_tool_call_accuracy_evaluator.py +446 -0
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -99
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -23
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -105
- azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -59
- azure_ai_evaluation-1.5.0/tests/converters/ai_agent_converter/serialization_helper.py +0 -110
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_azure/_models.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.5.0/azure/ai/evaluation/_common/raiclient → azure_ai_evaluation-1.7.0/azure/ai/evaluation/_common/onedp}/py.typed +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_converters/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
- {azure_ai_evaluation-1.5.0/azure/ai/evaluation/_vendor → azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_common}/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0/azure/ai/evaluation/_legacy/_batch_engine → azure_ai_evaluation-1.7.0/azure/ai/evaluation/_legacy/_common}/_logging.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.5.0/azure/ai/evaluation/simulator/_data_sources → azure_ai_evaluation-1.7.0/azure/ai/evaluation/_vendor}/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_red_team_result.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_utils/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_utils/constants.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_simulator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/migration_guide.md +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/README.md +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/instructions.md +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/task_adherence.ipynb +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/agent_evaluators/user_functions.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/evaluation_samples_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/samples/evaluation_samples_simulate.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/setup.cfg +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_metrics_upload.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/e2etests/test_prompty_async.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_agent_evaluators.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_built_in_evaluator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluate_performance.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/__init__.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_constants.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_redteam/test_strategy_utils.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/tests/unittests/test_utils.py +0 -0
|
@@ -1,5 +1,32 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
## 1.7.0 (2025-05-12)
|
|
4
|
+
|
|
5
|
+
### Bugs Fixed
|
|
6
|
+
- azure-ai-evaluation failed with module not found [#40992](https://github.com/Azure/azure-sdk-for-python/issues/40992)
|
|
7
|
+
|
|
8
|
+
## 1.6.0 (2025-05-07)
|
|
9
|
+
|
|
10
|
+
### Features Added
|
|
11
|
+
- New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
|
|
12
|
+
- Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
|
|
13
|
+
- AzureOpenAIGrader (general class for experienced users)
|
|
14
|
+
- AzureOpenAILabelGrader
|
|
15
|
+
- AzureOpenAIStringCheckGrader
|
|
16
|
+
- AzureOpenAITextSimilarityGrader
|
|
17
|
+
|
|
18
|
+
### Breaking Changes
|
|
19
|
+
- In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
|
|
20
|
+
|
|
21
|
+
### Bugs Fixed
|
|
22
|
+
- Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
|
|
23
|
+
```
|
|
24
|
+
"query1": "some query", "response": "some response"
|
|
25
|
+
```
|
|
26
|
+
throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
|
|
27
|
+
Now, users may import data containing fields with numeric characters.
|
|
28
|
+
|
|
29
|
+
|
|
3
30
|
## 1.5.0 (2025-04-04)
|
|
4
31
|
|
|
5
32
|
### Features Added
|
{azure_ai_evaluation-1.5.0/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.7.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -30,9 +30,11 @@ Requires-Dist: nltk>=3.9.1
|
|
|
30
30
|
Requires-Dist: azure-storage-blob>=12.10.0
|
|
31
31
|
Requires-Dist: httpx>=0.25.1
|
|
32
32
|
Requires-Dist: pandas<3.0.0,>=2.1.2
|
|
33
|
-
Requires-Dist: openai>=1.
|
|
33
|
+
Requires-Dist: openai>=1.78.0
|
|
34
34
|
Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
|
|
35
35
|
Requires-Dist: msrest>=0.6.21
|
|
36
|
+
Requires-Dist: Jinja2>=3.1.6
|
|
37
|
+
Requires-Dist: aiohttp>=3.0
|
|
36
38
|
Provides-Extra: redteam
|
|
37
39
|
Requires-Dist: pyrit==0.8.1; extra == "redteam"
|
|
38
40
|
|
|
@@ -114,13 +116,23 @@ result = relevance_evaluator(
|
|
|
114
116
|
response="The capital of Japan is Tokyo."
|
|
115
117
|
)
|
|
116
118
|
|
|
117
|
-
#
|
|
119
|
+
# There are two ways to provide Azure AI Project.
|
|
120
|
+
# Option #1 : Using Azure AI Project Details
|
|
118
121
|
azure_ai_project = {
|
|
119
122
|
"subscription_id": "<subscription_id>",
|
|
120
123
|
"resource_group_name": "<resource_group_name>",
|
|
121
124
|
"project_name": "<project_name>",
|
|
122
125
|
}
|
|
123
126
|
|
|
127
|
+
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
128
|
+
result = violence_evaluator(
|
|
129
|
+
query="What is the capital of France?",
|
|
130
|
+
response="Paris."
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Option # 2 : Using Azure AI Project Url
|
|
134
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
135
|
+
|
|
124
136
|
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
125
137
|
result = violence_evaluator(
|
|
126
138
|
query="What is the capital of France?",
|
|
@@ -271,11 +283,18 @@ with open("simulator_output.jsonl", "w") as f:
|
|
|
271
283
|
```python
|
|
272
284
|
from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
|
|
273
285
|
from azure.identity import DefaultAzureCredential
|
|
286
|
+
|
|
287
|
+
# There are two ways to provide Azure AI Project.
|
|
288
|
+
# Option #1 : Using Azure AI Project
|
|
274
289
|
azure_ai_project = {
|
|
275
290
|
"subscription_id": <subscription_id>,
|
|
276
291
|
"resource_group_name": <resource_group_name>,
|
|
277
292
|
"project_name": <project_name>
|
|
278
293
|
}
|
|
294
|
+
|
|
295
|
+
# Option #2 : Using Azure AI Project Url
|
|
296
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
297
|
+
|
|
279
298
|
scenario = AdversarialScenario.ADVERSARIAL_QA
|
|
280
299
|
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
|
|
281
300
|
|
|
@@ -381,6 +400,33 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
381
400
|
|
|
382
401
|
# Release History
|
|
383
402
|
|
|
403
|
+
## 1.7.0 (2025-05-12)
|
|
404
|
+
|
|
405
|
+
### Bugs Fixed
|
|
406
|
+
- azure-ai-evaluation failed with module not found [#40992](https://github.com/Azure/azure-sdk-for-python/issues/40992)
|
|
407
|
+
|
|
408
|
+
## 1.6.0 (2025-05-07)
|
|
409
|
+
|
|
410
|
+
### Features Added
|
|
411
|
+
- New `<evaluator>.binary_aggregate` field added to evaluation result metrics. This field contains the aggregated binary evaluation results for each evaluator, providing a summary of the evaluation outcomes.
|
|
412
|
+
- Added support for Azure Open AI evaluation via 4 new 'grader' classes, which serve as wrappers around Azure Open AI grader configurations. These new grader objects can be supplied to the main `evaluate` method as if they were normal callable evaluators. The new classes are:
|
|
413
|
+
- AzureOpenAIGrader (general class for experienced users)
|
|
414
|
+
- AzureOpenAILabelGrader
|
|
415
|
+
- AzureOpenAIStringCheckGrader
|
|
416
|
+
- AzureOpenAITextSimilarityGrader
|
|
417
|
+
|
|
418
|
+
### Breaking Changes
|
|
419
|
+
- In the experimental RedTeam's scan method, the `data_only` param has been replaced with `skip_evals` and if you do not want data to be uploaded, use the `skip_upload` flag.
|
|
420
|
+
|
|
421
|
+
### Bugs Fixed
|
|
422
|
+
- Fixed error in `evaluate` where data fields could not contain numeric characters. Previously, a data file with schema:
|
|
423
|
+
```
|
|
424
|
+
"query1": "some query", "response": "some response"
|
|
425
|
+
```
|
|
426
|
+
throws error when passed into `evaluator_config` as `{"evaluator_name": {"column_mapping": {"query": "${data.query1}", "response": "${data.response}"}},}`.
|
|
427
|
+
Now, users may import data containing fields with numeric characters.
|
|
428
|
+
|
|
429
|
+
|
|
384
430
|
## 1.5.0 (2025-04-04)
|
|
385
431
|
|
|
386
432
|
### Features Added
|
|
@@ -76,13 +76,23 @@ result = relevance_evaluator(
|
|
|
76
76
|
response="The capital of Japan is Tokyo."
|
|
77
77
|
)
|
|
78
78
|
|
|
79
|
-
#
|
|
79
|
+
# There are two ways to provide Azure AI Project.
|
|
80
|
+
# Option #1 : Using Azure AI Project Details
|
|
80
81
|
azure_ai_project = {
|
|
81
82
|
"subscription_id": "<subscription_id>",
|
|
82
83
|
"resource_group_name": "<resource_group_name>",
|
|
83
84
|
"project_name": "<project_name>",
|
|
84
85
|
}
|
|
85
86
|
|
|
87
|
+
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
88
|
+
result = violence_evaluator(
|
|
89
|
+
query="What is the capital of France?",
|
|
90
|
+
response="Paris."
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Option # 2 : Using Azure AI Project Url
|
|
94
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
95
|
+
|
|
86
96
|
violence_evaluator = ViolenceEvaluator(azure_ai_project)
|
|
87
97
|
result = violence_evaluator(
|
|
88
98
|
query="What is the capital of France?",
|
|
@@ -233,11 +243,18 @@ with open("simulator_output.jsonl", "w") as f:
|
|
|
233
243
|
```python
|
|
234
244
|
from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
|
|
235
245
|
from azure.identity import DefaultAzureCredential
|
|
246
|
+
|
|
247
|
+
# There are two ways to provide Azure AI Project.
|
|
248
|
+
# Option #1 : Using Azure AI Project
|
|
236
249
|
azure_ai_project = {
|
|
237
250
|
"subscription_id": <subscription_id>,
|
|
238
251
|
"resource_group_name": <resource_group_name>,
|
|
239
252
|
"project_name": <project_name>
|
|
240
253
|
}
|
|
254
|
+
|
|
255
|
+
# Option #2 : Using Azure AI Project Url
|
|
256
|
+
azure_ai_project = "https://{resource_name}.services.ai.azure.com/api/projects/{project_name}"
|
|
257
|
+
|
|
241
258
|
scenario = AdversarialScenario.ADVERSARIAL_QA
|
|
242
259
|
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
|
|
243
260
|
|
|
@@ -6,11 +6,18 @@ This guide walks you through how to investigate failures, common errors in the `
|
|
|
6
6
|
|
|
7
7
|
- [Handle Evaluate API Errors](#handle-evaluate-api-errors)
|
|
8
8
|
- [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues)
|
|
9
|
+
- [Troubleshoot Column Mapping Issues](#troubleshoot-column-mapping-issues)
|
|
9
10
|
- [Troubleshoot Safety Evaluator Issues](#troubleshoot-safety-evaluator-issues)
|
|
11
|
+
- [Troubleshoot Quality Evaluator Issues](#troubleshoot-quality-evaluator-issues)
|
|
10
12
|
- [Handle Simulation Errors](#handle-simulation-errors)
|
|
11
13
|
- [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions)
|
|
14
|
+
- [Need to generate simulations for specific harm type](#need-to-generate-simulations-for-specific-harm-type)
|
|
15
|
+
- [Simulator is slow](#simulator-is-slow)
|
|
16
|
+
- [Handle RedTeam Errors](#handle-redteam-errors)
|
|
17
|
+
- [Target resource not found](#target-resource-not-found)
|
|
18
|
+
- [Insufficient Storage Permissions](#insufficient-storage-permissions)
|
|
12
19
|
- [Logging](#logging)
|
|
13
|
-
- [Get
|
|
20
|
+
- [Get Additional Help](#get-additional-help)
|
|
14
21
|
|
|
15
22
|
## Handle Evaluate API Errors
|
|
16
23
|
|
|
@@ -30,11 +37,18 @@ This guide walks you through how to investigate failures, common errors in the `
|
|
|
30
37
|
|
|
31
38
|
- Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio).
|
|
32
39
|
|
|
40
|
+
### Troubleshoot Column Mapping Issues
|
|
41
|
+
|
|
42
|
+
- When using `column_mapping` parameter in evaluators, ensure all keys and values are non-empty strings and contain only alphanumeric characters. Empty strings, non-string values, or non-alphanumeric characters can cause serialization errors and issues in downstream applications. Example of valid mapping: `{"query": "${data.query}", "response": "${data.response}"}`.
|
|
43
|
+
|
|
33
44
|
### Troubleshoot Safety Evaluator Issues
|
|
34
45
|
|
|
35
46
|
- Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
|
|
36
47
|
- If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations.
|
|
37
48
|
|
|
49
|
+
### Troubleshoot Quality Evaluator Issues
|
|
50
|
+
- For `ToolCallAccuracyEvaluator`, if your input did not have a tool to evaluate, the current behavior is to output `null`.
|
|
51
|
+
|
|
38
52
|
## Handle Simulation Errors
|
|
39
53
|
|
|
40
54
|
### Adversarial Simulation Supported Regions
|
|
@@ -51,6 +65,30 @@ The Adversarial simulator does not support selecting individual harms, instead w
|
|
|
51
65
|
Identify the type of simulations being run (adversarial or non-adversarial).
|
|
52
66
|
Adjust parameters such as `api_call_retry_sleep_sec`, `api_call_delay_sec`, and `concurrent_async_task`. Please note that rate limits to llm calls can be both tokens per minute and requests per minute.
|
|
53
67
|
|
|
68
|
+
## Handle RedTeam errors
|
|
69
|
+
|
|
70
|
+
### Target resource not found
|
|
71
|
+
When initializing an Azure OpenAI model directly as `target` for a `RedTeam` scan, ensure `azure_endpoint` is specified in the format `https://<hub>.openai.azure.com/openai/deployments/<deployment_name>/chat/completions?api-version=2025-01-01-preview`. If using `AzureOpenAI`, `endpoint` should be specified in the format `https://<hub>.openai.azure.com/`.
|
|
72
|
+
|
|
73
|
+
### Insufficient Storage Permissions
|
|
74
|
+
If you see an error like `WARNING: Failed to log artifacts to MLFlow: (UserError) Failed to upload evaluation run to the cloud due to insufficient permission to access the storage`, you need to ensure that proper permissions are assigned to the storage account linked to your Azure AI Project.
|
|
75
|
+
|
|
76
|
+
To fix this issue:
|
|
77
|
+
1. Open the associated resource group being used in your Azure AI Project in the Azure Portal
|
|
78
|
+
2. Look up the storage accounts associated with that resource group
|
|
79
|
+
3. Open each storage account and click on "Access control (IAM)" on the left side navigation
|
|
80
|
+
4. Add permissions for the desired users with the "Storage Blob Data Contributor" role
|
|
81
|
+
|
|
82
|
+
If you have Azure CLI, you can use the following command:
|
|
83
|
+
|
|
84
|
+
```Shell
|
|
85
|
+
# <mySubscriptionID>: Subscription ID of the Azure AI Studio hub's linked storage account (available in Azure AI hub resource view in Azure Portal).
|
|
86
|
+
# <myResourceGroupName>: Resource group of the Azure AI Studio hub's linked storage account.
|
|
87
|
+
# <user-id>: User object ID for role assignment (retrieve with "az ad user show" command).
|
|
88
|
+
|
|
89
|
+
az role assignment create --role "Storage Blob Data Contributor" --scope /subscriptions/<mySubscriptionID>/resourceGroups/<myResourceGroupName> --assignee-principal-type User --assignee-object-id "<user-id>"
|
|
90
|
+
```
|
|
91
|
+
|
|
54
92
|
## Logging
|
|
55
93
|
|
|
56
94
|
You can set logging level via environment variable `PF_LOGGING_LEVEL`, valid values includes `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, default to `INFO`.
|
|
@@ -31,6 +31,7 @@ from ._evaluators._xpia import IndirectAttackEvaluator
|
|
|
31
31
|
from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
|
|
32
32
|
from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
|
|
33
33
|
from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
|
|
34
|
+
from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
|
|
34
35
|
from ._model_configurations import (
|
|
35
36
|
AzureAIProject,
|
|
36
37
|
AzureOpenAIModelConfiguration,
|
|
@@ -40,6 +41,11 @@ from ._model_configurations import (
|
|
|
40
41
|
Message,
|
|
41
42
|
OpenAIModelConfiguration,
|
|
42
43
|
)
|
|
44
|
+
from ._aoai.aoai_grader import AzureOpenAIGrader
|
|
45
|
+
from ._aoai.label_grader import AzureOpenAILabelGrader
|
|
46
|
+
from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
|
|
47
|
+
from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
|
|
48
|
+
|
|
43
49
|
|
|
44
50
|
_patch_all = []
|
|
45
51
|
|
|
@@ -89,6 +95,10 @@ __all__ = [
|
|
|
89
95
|
"CodeVulnerabilityEvaluator",
|
|
90
96
|
"UngroundedAttributesEvaluator",
|
|
91
97
|
"ToolCallAccuracyEvaluator",
|
|
98
|
+
"AzureOpenAIGrader",
|
|
99
|
+
"AzureOpenAILabelGrader",
|
|
100
|
+
"AzureOpenAIStringCheckGrader",
|
|
101
|
+
"AzureOpenAITextSimilarityGrader",
|
|
92
102
|
]
|
|
93
103
|
|
|
94
104
|
__all__.extend([p for p in _patch_all if p not in __all__])
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from .aoai_grader import AzureOpenAIGrader
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"AzureOpenAIGrader",
|
|
10
|
+
]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
5
|
+
|
|
6
|
+
from azure.ai.evaluation._constants import DEFAULT_AOAI_API_VERSION
|
|
7
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
8
|
+
from typing import Any, Dict, Union
|
|
9
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@experimental
|
|
13
|
+
class AzureOpenAIGrader():
|
|
14
|
+
"""
|
|
15
|
+
Base class for Azure OpenAI grader wrappers, recommended only for use by experienced OpenAI API users.
|
|
16
|
+
Combines a model configuration and any grader configuration
|
|
17
|
+
into a singular object that can be used in evaluations.
|
|
18
|
+
|
|
19
|
+
Supplying an AzureOpenAIGrader to the `evaluate` method will cause an asynchronous request to evaluate
|
|
20
|
+
the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
|
|
21
|
+
evaluation results.
|
|
22
|
+
|
|
23
|
+
:param model_config: The model configuration to use for the grader.
|
|
24
|
+
:type model_config: Union[
|
|
25
|
+
~azure.ai.evaluation.AzureOpenAIModelConfiguration,
|
|
26
|
+
~azure.ai.evaluation.OpenAIModelConfiguration
|
|
27
|
+
]
|
|
28
|
+
:param grader_config: The grader configuration to use for the grader. This is expected
|
|
29
|
+
to be formatted as a dictionary that matches the specifications of the sub-types of
|
|
30
|
+
the TestingCriterion alias specified in (OpenAI's SDK)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L151].
|
|
31
|
+
:type grader_config: Dict[str, Any]
|
|
32
|
+
:param kwargs: Additional keyword arguments to pass to the grader.
|
|
33
|
+
:type kwargs: Any
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
id = "aoai://general"
|
|
39
|
+
|
|
40
|
+
def __init__(self, *, model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], grader_config: Dict[str, Any], **kwargs: Any):
|
|
41
|
+
self._model_config = model_config
|
|
42
|
+
self._grader_config = grader_config
|
|
43
|
+
|
|
44
|
+
if kwargs.get("validate", True):
|
|
45
|
+
self._validate_model_config()
|
|
46
|
+
self._validate_grader_config()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _validate_model_config(self) -> None:
|
|
51
|
+
"""Validate the model configuration that this grader wrapper is using."""
|
|
52
|
+
if "api_key" not in self._model_config or not self._model_config.get("api_key"):
|
|
53
|
+
msg = f"{type(self).__name__}: Requires an api_key in the supplied model_config."
|
|
54
|
+
raise EvaluationException(
|
|
55
|
+
message=msg,
|
|
56
|
+
blame=ErrorBlame.USER_ERROR,
|
|
57
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
58
|
+
target=ErrorTarget.AOAI_GRADER,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def _validate_grader_config(self) -> None:
|
|
62
|
+
"""Validate the grader configuration that this grader wrapper is using."""
|
|
63
|
+
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
def get_client(self) -> Any:
|
|
67
|
+
"""Construct an appropriate OpenAI client using this grader's model configuration.
|
|
68
|
+
Returns a slightly different client depending on whether or not this grader's model
|
|
69
|
+
configuration is for Azure OpenAI or OpenAI.
|
|
70
|
+
|
|
71
|
+
:return: The OpenAI client.
|
|
72
|
+
:rtype: [~openai.OpenAI, ~openai.AzureOpenAI]
|
|
73
|
+
"""
|
|
74
|
+
if "azure_endpoint" in self._model_config:
|
|
75
|
+
from openai import AzureOpenAI
|
|
76
|
+
# TODO set default values?
|
|
77
|
+
return AzureOpenAI(
|
|
78
|
+
azure_endpoint=self._model_config["azure_endpoint"],
|
|
79
|
+
api_key=self._model_config.get("api_key", None), # Default-style access to appease linters.
|
|
80
|
+
api_version=DEFAULT_AOAI_API_VERSION, # Force a known working version
|
|
81
|
+
azure_deployment=self._model_config.get("azure_deployment", ""),
|
|
82
|
+
)
|
|
83
|
+
from openai import OpenAI
|
|
84
|
+
# TODO add default values for base_url and organization?
|
|
85
|
+
return OpenAI(
|
|
86
|
+
api_key=self._model_config["api_key"],
|
|
87
|
+
base_url=self._model_config.get("base_url", ""),
|
|
88
|
+
organization=self._model_config.get("organization", ""),
|
|
89
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from typing import Any, Dict, Union, List
|
|
5
|
+
|
|
6
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
7
|
+
from openai.types.graders import LabelModelGrader
|
|
8
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
9
|
+
|
|
10
|
+
from .aoai_grader import AzureOpenAIGrader
|
|
11
|
+
|
|
12
|
+
@experimental
|
|
13
|
+
class AzureOpenAILabelGrader(AzureOpenAIGrader):
|
|
14
|
+
"""
|
|
15
|
+
Wrapper class for OpenAI's label model graders.
|
|
16
|
+
|
|
17
|
+
Supplying a LabelGrader to the `evaluate` method will cause an asynchronous request to evaluate
|
|
18
|
+
the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
|
|
19
|
+
evaluation results.
|
|
20
|
+
|
|
21
|
+
:param model_config: The model configuration to use for the grader.
|
|
22
|
+
:type model_config: Union[
|
|
23
|
+
~azure.ai.evaluation.AzureOpenAIModelConfiguration,
|
|
24
|
+
~azure.ai.evaluation.OpenAIModelConfiguration
|
|
25
|
+
]
|
|
26
|
+
:param input: The list of label-based testing criterion for this grader. Individual
|
|
27
|
+
values of this list are expected to be dictionaries that match the format of any of the valid
|
|
28
|
+
(TestingCriterionLabelModelInput)[https://github.com/openai/openai-python/blob/ed53107e10e6c86754866b48f8bd862659134ca8/src/openai/types/eval_create_params.py#L125C1-L125C32]
|
|
29
|
+
subtypes.
|
|
30
|
+
:type input: List[Dict[str, str]]
|
|
31
|
+
:param labels: A list of strings representing the classification labels of this grader.
|
|
32
|
+
:type labels: List[str]
|
|
33
|
+
:param model: The model to use for the evaluation. Must support structured outputs.
|
|
34
|
+
:type model: str
|
|
35
|
+
:param name: The name of the grader.
|
|
36
|
+
:type name: str
|
|
37
|
+
:param passing_labels: The labels that indicate a passing result. Must be a subset of labels.
|
|
38
|
+
:type passing_labels: List[str]
|
|
39
|
+
:param kwargs: Additional keyword arguments to pass to the grader.
|
|
40
|
+
:type kwargs: Any
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
id = "aoai://label_model"
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
|
|
51
|
+
input: List[Dict[str, str]],
|
|
52
|
+
labels: List[str],
|
|
53
|
+
model: str,
|
|
54
|
+
name: str,
|
|
55
|
+
passing_labels: List[str],
|
|
56
|
+
**kwargs: Any
|
|
57
|
+
):
|
|
58
|
+
grader = LabelModelGrader(
|
|
59
|
+
input=input,
|
|
60
|
+
labels=labels,
|
|
61
|
+
model=model,
|
|
62
|
+
name=name,
|
|
63
|
+
passing_labels=passing_labels,
|
|
64
|
+
type="label_model",
|
|
65
|
+
)
|
|
66
|
+
super().__init__(model_config=model_config, grader_config=grader, **kwargs)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from typing import Any, Dict, Union
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
8
|
+
from openai.types.graders import StringCheckGrader
|
|
9
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
10
|
+
|
|
11
|
+
from .aoai_grader import AzureOpenAIGrader
|
|
12
|
+
|
|
13
|
+
@experimental
|
|
14
|
+
class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):
|
|
15
|
+
"""
|
|
16
|
+
Wrapper class for OpenAI's string check graders.
|
|
17
|
+
|
|
18
|
+
Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
|
|
19
|
+
the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
|
|
20
|
+
evaluation results.
|
|
21
|
+
|
|
22
|
+
:param model_config: The model configuration to use for the grader.
|
|
23
|
+
:type model_config: Union[
|
|
24
|
+
~azure.ai.evaluation.AzureOpenAIModelConfiguration,
|
|
25
|
+
~azure.ai.evaluation.OpenAIModelConfiguration
|
|
26
|
+
]
|
|
27
|
+
:param input: The input text. This may include template strings.
|
|
28
|
+
:type input: str
|
|
29
|
+
:param name: The name of the grader.
|
|
30
|
+
:type name: str
|
|
31
|
+
:param operation: The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
|
|
32
|
+
:type operation: Literal["eq", "ne", "like", "ilike"]
|
|
33
|
+
:param reference: The reference text. This may include template strings.
|
|
34
|
+
:type reference: str
|
|
35
|
+
:param kwargs: Additional keyword arguments to pass to the grader.
|
|
36
|
+
:type kwargs: Any
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
id = "aoai://string_check"
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
*,
|
|
46
|
+
model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
|
|
47
|
+
input: str,
|
|
48
|
+
name: str,
|
|
49
|
+
operation: Literal[
|
|
50
|
+
"eq",
|
|
51
|
+
"ne",
|
|
52
|
+
"like",
|
|
53
|
+
"ilike",
|
|
54
|
+
],
|
|
55
|
+
reference: str,
|
|
56
|
+
**kwargs: Any
|
|
57
|
+
):
|
|
58
|
+
grader = StringCheckGrader(
|
|
59
|
+
input=input,
|
|
60
|
+
name=name,
|
|
61
|
+
operation=operation,
|
|
62
|
+
reference=reference,
|
|
63
|
+
type="string_check",
|
|
64
|
+
)
|
|
65
|
+
super().__init__(model_config=model_config, grader_config=grader, **kwargs)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from typing import Any, Dict, Union
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
8
|
+
from openai.types.graders import TextSimilarityGrader
|
|
9
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
10
|
+
|
|
11
|
+
from .aoai_grader import AzureOpenAIGrader
|
|
12
|
+
|
|
13
|
+
@experimental
|
|
14
|
+
class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):
|
|
15
|
+
"""
|
|
16
|
+
Wrapper class for OpenAI's string check graders.
|
|
17
|
+
|
|
18
|
+
Supplying a StringCheckGrader to the `evaluate` method will cause an asynchronous request to evaluate
|
|
19
|
+
the grader via the OpenAI API. The results of the evaluation will then be merged into the standard
|
|
20
|
+
evaluation results.
|
|
21
|
+
|
|
22
|
+
:param model_config: The model configuration to use for the grader.
|
|
23
|
+
:type model_config: Union[
|
|
24
|
+
~azure.ai.evaluation.AzureOpenAIModelConfiguration,
|
|
25
|
+
~azure.ai.evaluation.OpenAIModelConfiguration
|
|
26
|
+
]
|
|
27
|
+
:param evaluation_metric: The evaluation metric to use.
|
|
28
|
+
:type evaluation_metric: Literal[
|
|
29
|
+
"fuzzy_match",
|
|
30
|
+
"bleu",
|
|
31
|
+
"gleu",
|
|
32
|
+
"meteor",
|
|
33
|
+
"rouge_1",
|
|
34
|
+
"rouge_2",
|
|
35
|
+
"rouge_3",
|
|
36
|
+
"rouge_4",
|
|
37
|
+
"rouge_5",
|
|
38
|
+
"rouge_l",
|
|
39
|
+
"cosine",
|
|
40
|
+
]
|
|
41
|
+
:param input: The text being graded.
|
|
42
|
+
:type input: str
|
|
43
|
+
:param pass_threshold: A float score where a value greater than or equal indicates a passing grade.
|
|
44
|
+
:type pass_threshold: float
|
|
45
|
+
:param reference: The text being graded against.
|
|
46
|
+
:type reference: str
|
|
47
|
+
:param name: The name of the grader.
|
|
48
|
+
:type name: str
|
|
49
|
+
:param kwargs: Additional keyword arguments to pass to the grader.
|
|
50
|
+
:type kwargs: Any
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
id = "aoai://text_similarity"
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
*,
|
|
60
|
+
model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
|
|
61
|
+
evaluation_metric: Literal[
|
|
62
|
+
"fuzzy_match",
|
|
63
|
+
"bleu",
|
|
64
|
+
"gleu",
|
|
65
|
+
"meteor",
|
|
66
|
+
"rouge_1",
|
|
67
|
+
"rouge_2",
|
|
68
|
+
"rouge_3",
|
|
69
|
+
"rouge_4",
|
|
70
|
+
"rouge_5",
|
|
71
|
+
"rouge_l",
|
|
72
|
+
"cosine",
|
|
73
|
+
],
|
|
74
|
+
input: str,
|
|
75
|
+
pass_threshold: float,
|
|
76
|
+
reference: str,
|
|
77
|
+
name: str,
|
|
78
|
+
**kwargs: Any
|
|
79
|
+
):
|
|
80
|
+
grader = TextSimilarityGrader(
|
|
81
|
+
evaluation_metric=evaluation_metric,
|
|
82
|
+
input=input,
|
|
83
|
+
pass_threshold=pass_threshold,
|
|
84
|
+
name=name,
|
|
85
|
+
reference=reference,
|
|
86
|
+
type="text_similarity",
|
|
87
|
+
)
|
|
88
|
+
super().__init__(model_config=model_config, grader_config=grader, **kwargs)
|
{azure_ai_evaluation-1.5.0 → azure_ai_evaluation-1.7.0}/azure/ai/evaluation/_azure/_clients.py
RENAMED
|
@@ -8,12 +8,12 @@ from threading import Lock
|
|
|
8
8
|
from urllib.parse import quote
|
|
9
9
|
from json.decoder import JSONDecodeError
|
|
10
10
|
|
|
11
|
-
from azure.core.credentials import TokenCredential, AzureSasCredential
|
|
11
|
+
from azure.core.credentials import TokenCredential, AzureSasCredential, AccessToken
|
|
12
12
|
from azure.core.rest import HttpResponse
|
|
13
13
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
14
|
from azure.ai.evaluation._http_utils import HttpPipeline, get_http_client
|
|
15
15
|
from azure.ai.evaluation._azure._token_manager import AzureMLTokenManager
|
|
16
|
-
from azure.ai.evaluation.
|
|
16
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
17
17
|
from ._models import BlobStoreInfo, Workspace
|
|
18
18
|
|
|
19
19
|
|
|
@@ -61,7 +61,7 @@ class LiteMLClient:
|
|
|
61
61
|
self._token_manager: Optional[AzureMLTokenManager] = None
|
|
62
62
|
self._credential: Optional[TokenCredential] = credential
|
|
63
63
|
|
|
64
|
-
def get_token(self) ->
|
|
64
|
+
def get_token(self) -> AccessToken:
|
|
65
65
|
return self._get_token_manager().get_token()
|
|
66
66
|
|
|
67
67
|
def get_credential(self) -> TokenCredential:
|
|
@@ -201,4 +201,4 @@ class LiteMLClient:
|
|
|
201
201
|
return url
|
|
202
202
|
|
|
203
203
|
def _get_headers(self) -> Dict[str, str]:
|
|
204
|
-
return {"Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json"}
|
|
204
|
+
return {"Authorization": f"Bearer {self.get_token().token}", "Content-Type": "application/json"}
|