azure-ai-evaluation 1.11.1__tar.gz → 1.11.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/CHANGELOG.md +6 -0
- {azure_ai_evaluation-1.11.1/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.11.2}/PKG-INFO +7 -1
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/utils.py +68 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_eval.py +13 -3
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +86 -33
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_engine.py +7 -2
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_version.py +1 -1
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2/azure_ai_evaluation.egg-info}/PKG-INFO +7 -1
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_builtin_evaluators.py +0 -1
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_built_in_evaluator.py +2 -24
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate.py +76 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/MANIFEST.in +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/README.md +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/TROUBLESHOOTING.md +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/aoai_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/label_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/python_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/score_model_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/string_check_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/text_similarity_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_clients.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_envs.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_models.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_token_manager.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/_experimental.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/evaluation_onedp_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/math.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_configuration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_model_base.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_serialization.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_types.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/model_base.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_utils/serialization.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_validation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_vendor.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/_version.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_enums.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_models.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/models/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/py.typed +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/rai_service.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_configuration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_model_base.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_serialization.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/_version.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_configuration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_enums.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_models.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/models/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/_operations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/operations/_patch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/raiclient/py.typed +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_ai_services.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_models.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_converters/_sk_services.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_eval_mapping.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/code_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_eval_run.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_evaluate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_evaluate_aoai.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluate/_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_bleu/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_bleu/_bleu.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/_coherence.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_eci/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_eci/_eci.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_f1_score/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/_fluency.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_gleu/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_gleu/_gleu.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_meteor/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_meteor/_meteor.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_protected_material/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_qa/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_qa/_qa.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/_relevance.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_rouge/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_xpia/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_evaluators/_xpia/xpia.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_exceptions.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_http_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_check.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_configuration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_errors.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_flows.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/_service.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/entities.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/tracing.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/types.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_adapters/utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_config.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_result.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_status.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_trace.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_async_token_provider.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_logging.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_connection.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_exceptions.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_prompty.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_model_configurations.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_user_agent.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/py.typed +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_functions.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_tools.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_agent_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_attack_strategy.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_default_converter.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_evaluation_processor.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_mlflow_integration.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_orchestrator_manager.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_red_team.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_red_team_result.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_result_processor.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/exception_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/file_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/formatting_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/logging_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/metric_mapping.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/progress_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/retry_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/red_team/_utils/strategy_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_adversarial_scenario.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_adversarial_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_conversation/constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_data_sources/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_data_sources/grounding.json +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_direct_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_rai_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/_template_handler.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_model_tools/models.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/simulator/_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/SOURCES.txt +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/dependency_links.txt +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/not-zip-safe +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/requires.txt +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure_ai_evaluation.egg-info/top_level.txt +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/migration_guide.md +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/pyproject.toml +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/README.md +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/agent_evaluation.ipynb +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/instructions.md +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/intent_resolution.ipynb +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/response_completeness.ipynb +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/sample_synthetic_conversations.jsonl +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/task_adherence.ipynb +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/tool_call_accuracy.ipynb +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/agent_evaluators/user_functions.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/aoai_score_model_grader_sample.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/data/custom_objectives_with_context_example.json +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/data/evaluate_test_data.jsonl +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_common.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_evaluate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_evaluate_fdp.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_simulate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/evaluation_samples_threshold.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/red_team_agent_tool_sample.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/red_team_samples.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/red_team_skip_upload.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/samples/semantic_kernel_red_team_agent_sample.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/setup.cfg +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/setup.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/__openai_patcher.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/conftest.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/serialization_helper.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_run_ids_from_conversation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_sk_agent_converter_internals.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/converters/ai_agent_converter/test_sk_turn_idxs_from_conversation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/custom_evaluators/answer_length_with_aggregation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/target_fn.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_aoai_graders.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_evaluate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_lite_management_client.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_mass_evaluate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_metrics_upload.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_prompty_async.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_red_team.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_remote_evaluation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/e2etests/test_sim_and_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_agent_evaluators.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_alignment_missing_rows.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_evaluation_pagination.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_integration_features.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_python_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_aoai_score_model_grader.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_batch_run_context.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_completeness_evaluator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_content_safety_defect_rate.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_content_safety_rai_script.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_document_retrieval_evaluator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_eval_run.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate_mismatch.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluate_performance.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/slow_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_conversation_thresholds.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_inputs_evaluators.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_service_evaluator_thresholds.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_evaluators/test_threshold_behavior.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_jailbreak_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_lazy_imports.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_non_adv_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/__init__.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_attack_objective_generator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_attack_strategy.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_callback_chat_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_constants.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_formatting_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_eval_chat_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_target.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_rai_service_true_false_scorer.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team_language_support.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_red_team_result.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_redteam/test_strategy_utils.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_remote_evaluation_features.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_safety_evaluation.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_save_eval.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_simulator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_synthetic_callback_conv_bot.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_synthetic_conversation_bot.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_tool_call_accuracy_evaluator.py +0 -0
- {azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_utils.py +0 -0
{azure_ai_evaluation-1.11.1/azure_ai_evaluation.egg-info → azure_ai_evaluation-1.11.2}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.2
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -413,6 +413,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
413
413
|
|
|
414
414
|
# Release History
|
|
415
415
|
|
|
416
|
+
## 1.11.2 (2025-10-09)
|
|
417
|
+
|
|
418
|
+
### Bugs Fixed
|
|
419
|
+
|
|
420
|
+
- **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
|
|
421
|
+
|
|
416
422
|
## 1.11.1 (2025-09-17)
|
|
417
423
|
|
|
418
424
|
### Bugs Fixed
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_common/utils.py
RENAMED
|
@@ -659,6 +659,74 @@ def reformat_tool_definitions(tool_definitions, logger=None):
|
|
|
659
659
|
return tool_definitions
|
|
660
660
|
|
|
661
661
|
|
|
662
|
+
def simplify_messages(messages, drop_system=True, drop_tool_calls=False, logger=None):
|
|
663
|
+
"""
|
|
664
|
+
Simplify a list of conversation messages by keeping only role and content.
|
|
665
|
+
Optionally filter out system messages and/or tool calls.
|
|
666
|
+
|
|
667
|
+
:param messages: List of message dicts (e.g., from query or response)
|
|
668
|
+
:param drop_system: If True, remove system role messages
|
|
669
|
+
:param drop_tool_calls: If True, remove tool_call items from assistant content
|
|
670
|
+
:return: New simplified list of messages
|
|
671
|
+
"""
|
|
672
|
+
if isinstance(messages, str):
|
|
673
|
+
return messages
|
|
674
|
+
try:
|
|
675
|
+
# Validate input is a list
|
|
676
|
+
if not isinstance(messages, list):
|
|
677
|
+
return messages
|
|
678
|
+
|
|
679
|
+
simplified_msgs = []
|
|
680
|
+
for msg in messages:
|
|
681
|
+
# Ensure msg is a dict
|
|
682
|
+
if not isinstance(msg, dict):
|
|
683
|
+
simplified_msgs.append(msg)
|
|
684
|
+
continue
|
|
685
|
+
|
|
686
|
+
role = msg.get("role")
|
|
687
|
+
content = msg.get("content", [])
|
|
688
|
+
|
|
689
|
+
# Drop system message (if should)
|
|
690
|
+
if drop_system and role == "system":
|
|
691
|
+
continue
|
|
692
|
+
|
|
693
|
+
# Simplify user messages
|
|
694
|
+
if role == "user":
|
|
695
|
+
simplified_msg = {
|
|
696
|
+
"role": role,
|
|
697
|
+
"content": _extract_text_from_content(content),
|
|
698
|
+
}
|
|
699
|
+
simplified_msgs.append(simplified_msg)
|
|
700
|
+
continue
|
|
701
|
+
|
|
702
|
+
# Drop tool results (if should)
|
|
703
|
+
if drop_tool_calls and role == "tool":
|
|
704
|
+
continue
|
|
705
|
+
|
|
706
|
+
# Simplify assistant messages
|
|
707
|
+
if role == "assistant":
|
|
708
|
+
simplified_content = _extract_text_from_content(content)
|
|
709
|
+
# Check if message has content
|
|
710
|
+
if simplified_content:
|
|
711
|
+
simplified_msg = {"role": role, "content": simplified_content}
|
|
712
|
+
simplified_msgs.append(simplified_msg)
|
|
713
|
+
continue
|
|
714
|
+
|
|
715
|
+
# Drop tool calls (if should)
|
|
716
|
+
if drop_tool_calls and any(c.get("type") == "tool_call" for c in content if isinstance(c, dict)):
|
|
717
|
+
continue
|
|
718
|
+
|
|
719
|
+
# If we reach here, it means we want to keep the message
|
|
720
|
+
simplified_msgs.append(msg)
|
|
721
|
+
|
|
722
|
+
return simplified_msgs
|
|
723
|
+
|
|
724
|
+
except Exception as ex:
|
|
725
|
+
if logger:
|
|
726
|
+
logger.debug(f"Error simplifying messages: {str(ex)}. Returning original messages.")
|
|
727
|
+
return messages
|
|
728
|
+
|
|
729
|
+
|
|
662
730
|
def upload(path: str, container_client: ContainerClient, logger=None):
|
|
663
731
|
"""Upload files or directories to Azure Blob Storage using a container client.
|
|
664
732
|
|
|
@@ -37,6 +37,8 @@ from azure.ai.evaluation._common._experimental import experimental
|
|
|
37
37
|
|
|
38
38
|
from ._conversation_aggregators import GetAggregator, GetAggregatorType
|
|
39
39
|
|
|
40
|
+
import copy
|
|
41
|
+
|
|
40
42
|
P = ParamSpec("P")
|
|
41
43
|
T = TypeVar("T")
|
|
42
44
|
T_EvalValue = TypeVar("T_EvalValue")
|
|
@@ -486,8 +488,12 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
486
488
|
"""
|
|
487
489
|
tool_calls = []
|
|
488
490
|
tool_results_map = {}
|
|
489
|
-
|
|
490
|
-
|
|
491
|
+
|
|
492
|
+
# Work on a deep copy to avoid modifying the original object
|
|
493
|
+
response_copy = copy.deepcopy(response)
|
|
494
|
+
|
|
495
|
+
if isinstance(response_copy, list):
|
|
496
|
+
for message in response_copy:
|
|
491
497
|
# Extract tool calls from assistant messages
|
|
492
498
|
if message.get("role") == "assistant" and isinstance(message.get("content"), list):
|
|
493
499
|
for content_item in message.get("content"):
|
|
@@ -519,7 +525,11 @@ class EvaluatorBase(ABC, Generic[T_EvalValue]):
|
|
|
519
525
|
:rtype: Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]
|
|
520
526
|
"""
|
|
521
527
|
# Convert inputs into list of evaluable inputs.
|
|
522
|
-
|
|
528
|
+
try:
|
|
529
|
+
eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
|
|
530
|
+
except Exception as e:
|
|
531
|
+
print(f"Error converting kwargs to eval_input_list: {e}")
|
|
532
|
+
raise e
|
|
523
533
|
per_turn_results = []
|
|
524
534
|
# Evaluate all inputs.
|
|
525
535
|
for eval_input in eval_input_list:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import os, logging
|
|
5
|
-
from typing import Dict, List, Optional, Union
|
|
5
|
+
from typing import Dict, List, Optional, Union, Any, Tuple
|
|
6
6
|
|
|
7
7
|
from typing_extensions import overload, override
|
|
8
8
|
from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
|
|
@@ -16,6 +16,7 @@ from ..._common.utils import (
|
|
|
16
16
|
ErrorCategory,
|
|
17
17
|
construct_prompty_model_config,
|
|
18
18
|
validate_model_config,
|
|
19
|
+
simplify_messages,
|
|
19
20
|
)
|
|
20
21
|
|
|
21
22
|
try:
|
|
@@ -207,6 +208,42 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
207
208
|
|
|
208
209
|
return super().__call__(*args, **kwargs)
|
|
209
210
|
|
|
211
|
+
def _has_context(self, eval_input: dict) -> bool:
|
|
212
|
+
"""
|
|
213
|
+
Return True if eval_input contains a non-empty 'context' field.
|
|
214
|
+
Treats None, empty strings, empty lists, and lists of empty strings as no context.
|
|
215
|
+
"""
|
|
216
|
+
context = eval_input.get("context", None)
|
|
217
|
+
if not context:
|
|
218
|
+
return False
|
|
219
|
+
if context == "<>": # Special marker for no context
|
|
220
|
+
return False
|
|
221
|
+
if isinstance(context, list):
|
|
222
|
+
return any(str(c).strip() for c in context)
|
|
223
|
+
if isinstance(context, str):
|
|
224
|
+
return bool(context.strip())
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
@override
|
|
228
|
+
async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
|
|
229
|
+
if "query" not in eval_input:
|
|
230
|
+
return await super()._do_eval(eval_input)
|
|
231
|
+
|
|
232
|
+
contains_context = self._has_context(eval_input)
|
|
233
|
+
|
|
234
|
+
simplified_query = simplify_messages(eval_input["query"], drop_tool_calls=contains_context)
|
|
235
|
+
simplified_response = simplify_messages(eval_input["response"], drop_tool_calls=False)
|
|
236
|
+
|
|
237
|
+
# Build simplified input
|
|
238
|
+
simplified_eval_input = {
|
|
239
|
+
"query": simplified_query,
|
|
240
|
+
"response": simplified_response,
|
|
241
|
+
"context": eval_input["context"],
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# Replace and call the parent method
|
|
245
|
+
return await super()._do_eval(simplified_eval_input)
|
|
246
|
+
|
|
210
247
|
async def _real_call(self, **kwargs):
|
|
211
248
|
"""The asynchronous call where real end-to-end evaluation logic is performed.
|
|
212
249
|
|
|
@@ -230,57 +267,73 @@ class GroundednessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
|
|
|
230
267
|
raise ex
|
|
231
268
|
|
|
232
269
|
def _convert_kwargs_to_eval_input(self, **kwargs):
|
|
233
|
-
if "context"
|
|
270
|
+
if kwargs.get("context") or kwargs.get("conversation"):
|
|
234
271
|
return super()._convert_kwargs_to_eval_input(**kwargs)
|
|
235
|
-
|
|
236
272
|
query = kwargs.get("query")
|
|
237
273
|
response = kwargs.get("response")
|
|
238
274
|
tool_definitions = kwargs.get("tool_definitions")
|
|
239
275
|
|
|
240
|
-
if not query or not response or not tool_definitions:
|
|
241
|
-
msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query'
|
|
276
|
+
if (not query) or (not response): # or not tool_definitions:
|
|
277
|
+
msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response' are required."
|
|
242
278
|
raise EvaluationException(
|
|
243
279
|
message=msg,
|
|
244
280
|
blame=ErrorBlame.USER_ERROR,
|
|
245
281
|
category=ErrorCategory.INVALID_VALUE,
|
|
246
282
|
target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
|
|
247
283
|
)
|
|
248
|
-
|
|
249
284
|
context = self._get_context_from_agent_response(response, tool_definitions)
|
|
250
|
-
if not context:
|
|
251
|
-
raise EvaluationException(
|
|
252
|
-
message=f"Context could not be extracted from agent response. Supported tools for groundedness are {self._SUPPORTED_TOOLS}. If supported tools are not used groundedness is not calculated.",
|
|
253
|
-
blame=ErrorBlame.USER_ERROR,
|
|
254
|
-
category=ErrorCategory.NOT_APPLICABLE,
|
|
255
|
-
target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
|
|
256
|
-
)
|
|
257
285
|
|
|
258
|
-
|
|
286
|
+
filtered_response = self._filter_file_search_results(response)
|
|
287
|
+
return super()._convert_kwargs_to_eval_input(response=filtered_response, context=context, query=query)
|
|
288
|
+
|
|
289
|
+
def _filter_file_search_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
290
|
+
"""Filter out file_search tool results from the messages."""
|
|
291
|
+
file_search_ids = self._get_file_search_tool_call_ids(messages)
|
|
292
|
+
return [
|
|
293
|
+
msg for msg in messages if not (msg.get("role") == "tool" and msg.get("tool_call_id") in file_search_ids)
|
|
294
|
+
]
|
|
259
295
|
|
|
260
296
|
def _get_context_from_agent_response(self, response, tool_definitions):
|
|
297
|
+
"""Extract context text from file_search tool results in the agent response."""
|
|
298
|
+
NO_CONTEXT = "<>"
|
|
261
299
|
context = ""
|
|
262
300
|
try:
|
|
263
301
|
logger.debug("Extracting context from response")
|
|
264
302
|
tool_calls = self._parse_tools_from_response(response=response)
|
|
265
|
-
logger.debug(f"Tool Calls parsed successfully
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
303
|
+
logger.debug(f"Tool Calls parsed successfully: {tool_calls}")
|
|
304
|
+
|
|
305
|
+
if not tool_calls:
|
|
306
|
+
return NO_CONTEXT
|
|
307
|
+
|
|
308
|
+
context_lines = []
|
|
309
|
+
for tool_call in tool_calls:
|
|
310
|
+
if not isinstance(tool_call, dict) or tool_call.get("type") != "tool_call":
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
tool_name = tool_call.get("name")
|
|
314
|
+
if tool_name != "file_search":
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Extract tool results
|
|
318
|
+
for result in tool_call.get("tool_result", []):
|
|
319
|
+
results = result if isinstance(result, list) else [result]
|
|
320
|
+
for r in results:
|
|
321
|
+
file_name = r.get("file_name", "Unknown file name")
|
|
322
|
+
for content in r.get("content", []):
|
|
323
|
+
text = content.get("text")
|
|
324
|
+
if text:
|
|
325
|
+
context_lines.append(f"{file_name}:\n- {text}---\n\n")
|
|
326
|
+
|
|
327
|
+
context = "\n".join(context_lines) if len(context_lines) > 0 else None
|
|
328
|
+
|
|
282
329
|
except Exception as ex:
|
|
283
330
|
logger.debug(f"Error extracting context from agent response : {str(ex)}")
|
|
284
|
-
context =
|
|
331
|
+
context = None
|
|
332
|
+
|
|
333
|
+
context = context if context else NO_CONTEXT
|
|
334
|
+
return context
|
|
285
335
|
|
|
286
|
-
|
|
336
|
+
def _get_file_search_tool_call_ids(self, query_or_response):
|
|
337
|
+
"""Return a list of tool_call_ids for file search tool calls."""
|
|
338
|
+
tool_calls = self._parse_tools_from_response(query_or_response)
|
|
339
|
+
return [tc.get("tool_call_id") for tc in tool_calls if tc.get("name") == "file_search"]
|
|
@@ -32,52 +32,53 @@ system:
|
|
|
32
32
|
|
|
33
33
|
user:
|
|
34
34
|
# Definition
|
|
35
|
-
**Groundedness** refers to how well an answer is anchored in the provided context, evaluating its relevance, accuracy, and completeness based exclusively on that context. It assesses the extent to which the answer directly and fully addresses the question without introducing unrelated or incorrect information.
|
|
35
|
+
**Groundedness** refers to how well an answer is anchored in the provided context, evaluating its relevance, accuracy, and completeness based exclusively on that context. It assesses the extent to which the answer directly and fully addresses the question without introducing unrelated or incorrect information.
|
|
36
|
+
|
|
37
|
+
> Context is the source of truth for evaluating the response. If it's empty, rely on the tool results in the response and query.
|
|
38
|
+
> Evaluate the groundedness of the response message, not the chat history.
|
|
36
39
|
|
|
37
40
|
# Ratings
|
|
38
41
|
## [Groundedness: 1] (Completely Unrelated Response)
|
|
39
|
-
**Definition:** An answer that does not relate to the question or the context in any way.
|
|
42
|
+
**Definition:** An answer that does not relate to the question or the context in any way.
|
|
43
|
+
- Does not relate to the question or context at all.
|
|
44
|
+
- Talks about the general topic but does not respond to the query.
|
|
40
45
|
|
|
41
46
|
**Examples:**
|
|
42
47
|
**Context:** The company's annual meeting will be held next Thursday.
|
|
43
48
|
**Query:** When is the company's annual meeting?
|
|
44
49
|
**Response:** I enjoy hiking in the mountains during summer.
|
|
45
50
|
|
|
46
|
-
**Context:** The new policy aims to reduce carbon emissions by 20% over the next five years.
|
|
47
|
-
**Query:** What is the goal of the new policy?
|
|
48
|
-
**Response:** My favorite color is blue.
|
|
49
|
-
|
|
50
|
-
## [Groundedness: 2] (Related Topic but Does Not Respond to the Query)
|
|
51
|
-
**Definition:** An answer that relates to the general topic of the context but does not answer the specific question asked. It may mention concepts from the context but fails to provide a direct or relevant response.
|
|
52
|
-
|
|
53
|
-
**Examples:**
|
|
54
51
|
**Context:** The museum will exhibit modern art pieces from various local artists.
|
|
55
52
|
**Query:** What kind of art will be exhibited at the museum?
|
|
56
53
|
**Response:** Museums are important cultural institutions.
|
|
57
54
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
**Response:** Software updates can sometimes fix bugs.
|
|
61
|
-
|
|
62
|
-
## [Groundedness: 3] (Attempts to Respond but Contains Incorrect Information)
|
|
63
|
-
**Definition:** An answer that attempts to respond to the question but includes incorrect information not supported by the context. It may misstate facts, misinterpret the context, or provide erroneous details.
|
|
55
|
+
## [Groundedness: 2] (Attempts to Respond but Contains Incorrect Information)
|
|
56
|
+
**Definition:** An answer that attempts to respond to the question but includes incorrect information not supported by the context. It may misstate facts, misinterpret the context, or provide erroneous details. Even if some points are correct, the presence of inaccuracies makes the response unreliable.
|
|
64
57
|
|
|
65
58
|
**Examples:**
|
|
66
|
-
**Context:** The festival starts on June 5th and features international musicians.
|
|
59
|
+
**Context:** - The festival starts on June 5th and features international musicians.
|
|
67
60
|
**Query:** When does the festival start?
|
|
68
61
|
**Response:** The festival starts on July 5th and features local artists.
|
|
69
62
|
|
|
70
|
-
**Context:**
|
|
71
|
-
**Query:**
|
|
72
|
-
**Response:**
|
|
63
|
+
**Context:** bakery_menu.txt: - Croissant au Beurre — flaky, buttery croissant
|
|
64
|
+
**Query:** [{"role":"user","content":"Are there croissants?"}]
|
|
65
|
+
**Response:** [{"role":"assistant","content":"Yes, Croissant au Beurre is on the menu, served with jam."}]
|
|
66
|
+
|
|
67
|
+
## [Groundedness: 3] (Nothing to be Grounded)
|
|
68
|
+
Definition: An answer that does not provide any information that can be evaluated against the context. This includes responses that are asking for clarification, providing polite fillers, or follow-up questions.
|
|
69
|
+
|
|
70
|
+
**Examples:**
|
|
71
|
+
**Context:**
|
|
72
|
+
**Query:** [{"role":"user","content":"How many eggs are needed for the recipe?"}, {"role":"tool","content":"tool_result": [{"file_name": "recipe.txt", "content": "The recipe requires two eggs and one cup of milk."}]}, {"role":"assistant","content":"You need three eggs for the recipe."}, {"role":"user","content":"Thank you."}]
|
|
73
|
+
**Response:** [{"role":"assistant","content":"You're welcome, anything else I can help with?"}]
|
|
73
74
|
|
|
74
75
|
## [Groundedness: 4] (Partially Correct Response)
|
|
75
76
|
**Definition:** An answer that provides a correct response to the question but is incomplete or lacks specific details mentioned in the context. It captures some of the necessary information but omits key elements needed for a full understanding.
|
|
76
77
|
|
|
77
78
|
**Examples:**
|
|
78
|
-
**Context:** The bookstore offers a 15% discount to students and a 10% discount to senior citizens.
|
|
79
|
-
**Query:** What discount does the bookstore offer to students?
|
|
80
|
-
**Response:**
|
|
79
|
+
**Context:** - store_details.txt: The bookstore offers a 15% discount to students and a 10% discount to senior citizens.
|
|
80
|
+
**Query:** [{"role":"user","content":"What discount does the bookstore offer to students, if any?"}]
|
|
81
|
+
**Response:** [{"role":"assistant","content":"Yes, students get a discount at the bookstore."}]
|
|
81
82
|
|
|
82
83
|
**Context:** The company's headquarters are located in Berlin, Germany.
|
|
83
84
|
**Query:** Where are the company's headquarters?
|
|
@@ -87,13 +88,13 @@ user:
|
|
|
87
88
|
**Definition:** An answer that thoroughly and accurately responds to the question, including all relevant details from the context. It directly addresses the question with precise information, demonstrating complete understanding without adding extraneous information.
|
|
88
89
|
|
|
89
90
|
**Examples:**
|
|
90
|
-
**
|
|
91
|
-
**
|
|
92
|
-
**
|
|
91
|
+
**CONTEXT:** The author released her latest novel, 'The Silent Echo', on September 1st.
|
|
92
|
+
**QUERY:** [{"role":"user","content":"When was 'The Silent Echo' released?"}]
|
|
93
|
+
**RESPONSE:** [{"role":"assistant","content":"The 'Silent Echo' was released on September 1st."}]
|
|
93
94
|
|
|
94
|
-
**Context:**
|
|
95
|
+
**Context:**
|
|
95
96
|
**Query:** By what date must participants register to receive early bird pricing?
|
|
96
|
-
**Response:** Participants must register by May 31st to receive early bird pricing.
|
|
97
|
+
**Response:** [{"role":"tool","content":"tool_result": [{"file_name": "store_guidelines.txt", "content": "Participants registering before and including May 31st will be eligible for early bird pricing."}]}, {"role":"assistant","content":"Participants must register by May 31st to receive early bird pricing."}]
|
|
97
98
|
|
|
98
99
|
|
|
99
100
|
# Data
|
|
@@ -103,7 +104,7 @@ RESPONSE: {{response}}
|
|
|
103
104
|
|
|
104
105
|
|
|
105
106
|
# Tasks
|
|
106
|
-
## Please provide your assessment Score for the previous RESPONSE in relation to the CONTEXT and
|
|
107
|
+
## Please provide your assessment Score for the previous RESPONSE message in relation to the CONTEXT, QUERY and RESPONSE tools based on the Definitions above. Your output should include the following information:
|
|
107
108
|
- **ThoughtChain**: To improve the reasoning process, think step by step and include a step-by-step explanation of your thought process as you analyze the data based on the definitions. Keep it brief and start your ThoughtChain with "Let's think step by step:".
|
|
108
109
|
- **Explanation**: a very short explanation of why you think the input Data should get that Score.
|
|
109
110
|
- **Score**: based on your previous analysis, provide your Score. The Score you give MUST be a integer score (i.e., "1", "2"...) based on the levels of the definitions.
|
|
@@ -344,8 +344,13 @@ class BatchEngine:
|
|
|
344
344
|
|
|
345
345
|
func_params = inspect.signature(self._func).parameters
|
|
346
346
|
|
|
347
|
-
|
|
348
|
-
|
|
347
|
+
has_kwargs = any(p.kind == p.VAR_KEYWORD for p in func_params.values())
|
|
348
|
+
|
|
349
|
+
if has_kwargs:
|
|
350
|
+
return inputs
|
|
351
|
+
else:
|
|
352
|
+
filtered_params = {key: value for key, value in inputs.items() if key in func_params}
|
|
353
|
+
return filtered_params
|
|
349
354
|
|
|
350
355
|
async def _exec_line_async(
|
|
351
356
|
self,
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2/azure_ai_evaluation.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.2
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -413,6 +413,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
413
413
|
|
|
414
414
|
# Release History
|
|
415
415
|
|
|
416
|
+
## 1.11.2 (2025-10-09)
|
|
417
|
+
|
|
418
|
+
### Bugs Fixed
|
|
419
|
+
|
|
420
|
+
- **kwargs in an evaluator signature receives input columns that are not otherwise named in the evaluator's signature
|
|
421
|
+
|
|
416
422
|
## 1.11.1 (2025-09-17)
|
|
417
423
|
|
|
418
424
|
### Bugs Fixed
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/tests/unittests/test_built_in_evaluator.py
RENAMED
|
@@ -201,27 +201,6 @@ class TestBuiltInEvaluators:
|
|
|
201
201
|
assert result["groundedness"] == result["gpt_groundedness"] == 1
|
|
202
202
|
assert "groundedness_reason" in result
|
|
203
203
|
|
|
204
|
-
def test_groundedness_evaluator_no_supported_tools(self, mock_model_config):
|
|
205
|
-
"""Test GroundednessEvaluator when no supported tools are used"""
|
|
206
|
-
groundedness_eval = GroundednessEvaluator(model_config=mock_model_config)
|
|
207
|
-
groundedness_eval._flow = MagicMock(return_value=quality_response_async_mock())
|
|
208
|
-
|
|
209
|
-
result = groundedness_eval(
|
|
210
|
-
query="What is the capital of Japan?",
|
|
211
|
-
response=[
|
|
212
|
-
{"role": "user", "content": "What is the capital of Japan?"},
|
|
213
|
-
{"role": "assistant", "content": "The capital of Japan is Tokyo."},
|
|
214
|
-
],
|
|
215
|
-
tool_definitions=[
|
|
216
|
-
{"name": "unsupported_tool", "type": "unsupported", "description": "An unsupported tool"}
|
|
217
|
-
],
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
# When no supported tools are used, it should return "not applicable" result
|
|
221
|
-
assert result["groundedness"] == "not applicable"
|
|
222
|
-
assert result["groundedness_result"] == "pass"
|
|
223
|
-
assert "Supported tools for groundedness are" in result["groundedness_reason"]
|
|
224
|
-
|
|
225
204
|
def test_groundedness_evaluator_with_context(self, mock_model_config):
|
|
226
205
|
"""Test GroundednessEvaluator with direct context (traditional use)"""
|
|
227
206
|
groundedness_eval = GroundednessEvaluator(model_config=mock_model_config)
|
|
@@ -244,11 +223,10 @@ class TestBuiltInEvaluators:
|
|
|
244
223
|
with pytest.raises(EvaluationException) as exc_info:
|
|
245
224
|
groundedness_eval(
|
|
246
225
|
query="What is the capital of Japan?",
|
|
247
|
-
|
|
248
|
-
# Missing tool_definitions
|
|
226
|
+
# Missing response
|
|
249
227
|
)
|
|
250
228
|
|
|
251
229
|
assert (
|
|
252
|
-
"Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query'
|
|
230
|
+
"Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response' are required."
|
|
253
231
|
in exc_info.value.args[0]
|
|
254
232
|
)
|
|
@@ -975,6 +975,82 @@ class TestEvaluate:
|
|
|
975
975
|
assert result[EvaluationRunProperties.NAME_MAP_LENGTH] == -1
|
|
976
976
|
assert len(result) == 1
|
|
977
977
|
|
|
978
|
+
def test_evaluate_evaluator_only_kwargs_param(self, evaluate_test_data_jsonl_file):
|
|
979
|
+
"""Validate that an evaluator with only an **kwargs param receives all input in kwargs."""
|
|
980
|
+
|
|
981
|
+
def evaluator(**kwargs):
|
|
982
|
+
return locals()
|
|
983
|
+
|
|
984
|
+
result = evaluate(data=evaluate_test_data_jsonl_file, evaluators={"test": evaluator})
|
|
985
|
+
|
|
986
|
+
assert len(result["rows"]) == 3
|
|
987
|
+
|
|
988
|
+
assert {"query", "response", "ground_truth", "context"}.issubset(result["rows"][0]["outputs.test.kwargs"])
|
|
989
|
+
assert {"query", "response", "ground_truth", "context"}.issubset(result["rows"][1]["outputs.test.kwargs"])
|
|
990
|
+
assert {"query", "response", "ground_truth", "context"}.issubset(result["rows"][2]["outputs.test.kwargs"])
|
|
991
|
+
|
|
992
|
+
def test_evaluate_evaluator_kwargs_param(self, evaluate_test_data_jsonl_file):
|
|
993
|
+
"""Validate that an evaluator with named parameters and **kwargs obeys python function call semantics."""
|
|
994
|
+
|
|
995
|
+
def evaluator(query, response, *, bar=None, **kwargs):
|
|
996
|
+
return locals()
|
|
997
|
+
|
|
998
|
+
result = evaluate(data=evaluate_test_data_jsonl_file, evaluators={"test": evaluator})
|
|
999
|
+
|
|
1000
|
+
assert len(result["rows"]) == 3
|
|
1001
|
+
|
|
1002
|
+
row1_kwargs = result["rows"][0]["outputs.test.kwargs"]
|
|
1003
|
+
row2_kwargs = result["rows"][1]["outputs.test.kwargs"]
|
|
1004
|
+
row3_kwargs = result["rows"][2]["outputs.test.kwargs"]
|
|
1005
|
+
|
|
1006
|
+
assert {"ground_truth", "context"}.issubset(row1_kwargs), "Unnamed parameters should be in kwargs"
|
|
1007
|
+
assert {"query", "response", "bar"}.isdisjoint(row1_kwargs), "Named parameters should not be in kwargs"
|
|
1008
|
+
|
|
1009
|
+
assert {"ground_truth", "context"}.issubset(row2_kwargs), "Unnamed parameters should be in kwargs"
|
|
1010
|
+
assert {"query", "response", "bar"}.isdisjoint(row2_kwargs), "Named parameters should not be in kwargs"
|
|
1011
|
+
|
|
1012
|
+
assert {"ground_truth", "context"}.issubset(row3_kwargs), "Unnamed parameters should be in kwargs"
|
|
1013
|
+
assert {"query", "response", "bar"}.isdisjoint(row3_kwargs), "Named parameters should not be in kwargs"
|
|
1014
|
+
|
|
1015
|
+
def test_evaluate_evaluator_kwargs_param_column_mapping(self, evaluate_test_data_jsonl_file):
|
|
1016
|
+
"""Validate that an evaluator with kwargs can receive column mapped values."""
|
|
1017
|
+
|
|
1018
|
+
def evaluator(query, response, *, bar=None, **kwargs):
|
|
1019
|
+
return locals()
|
|
1020
|
+
|
|
1021
|
+
result = evaluate(
|
|
1022
|
+
data=evaluate_test_data_jsonl_file,
|
|
1023
|
+
evaluators={"test": evaluator},
|
|
1024
|
+
evaluator_config={
|
|
1025
|
+
"default": {
|
|
1026
|
+
"column_mapping": {
|
|
1027
|
+
"query": "${data.query}",
|
|
1028
|
+
"response": "${data.response}",
|
|
1029
|
+
"foo": "${data.context}",
|
|
1030
|
+
"bar": "${data.ground_truth}",
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
},
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
assert len(result["rows"]) == 3
|
|
1037
|
+
|
|
1038
|
+
row1_kwargs = result["rows"][0]["outputs.test.kwargs"]
|
|
1039
|
+
row2_kwargs = result["rows"][1]["outputs.test.kwargs"]
|
|
1040
|
+
row3_kwargs = result["rows"][2]["outputs.test.kwargs"]
|
|
1041
|
+
|
|
1042
|
+
assert {"ground_truth", "context"}.issubset(row1_kwargs), "Unnamed parameters should be in kwargs"
|
|
1043
|
+
assert "foo" in row1_kwargs, "Making a column mapping to an unnamed parameter should appear in kwargs"
|
|
1044
|
+
assert {"query", "response", "bar"}.isdisjoint(row1_kwargs), "Named parameters should not be in kwargs"
|
|
1045
|
+
|
|
1046
|
+
assert {"ground_truth", "context"}.issubset(row2_kwargs), "Unnamed parameters should be in kwargs"
|
|
1047
|
+
assert "foo" in row2_kwargs, "Making a column mapping to an unnamed parameter should appear in kwargs"
|
|
1048
|
+
assert {"query", "response", "bar"}.isdisjoint(row2_kwargs), "Named parameters should not be in kwargs"
|
|
1049
|
+
|
|
1050
|
+
assert {"ground_truth", "context"}.issubset(row3_kwargs), "Unnamed parameters should be in kwargs"
|
|
1051
|
+
assert "foo" in row3_kwargs, "Making a column mapping to an unnamed parameter should appear in kwargs"
|
|
1052
|
+
assert {"query", "response", "bar"}.isdisjoint(row3_kwargs), "Named parameters should not be in kwargs"
|
|
1053
|
+
|
|
978
1054
|
|
|
979
1055
|
@pytest.mark.unittest
|
|
980
1056
|
class TestTagsInLoggingFunctions:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/__init__.py
RENAMED
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/aoai_grader.py
RENAMED
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/label_grader.py
RENAMED
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_aoai/python_grader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/__init__.py
RENAMED
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_clients.py
RENAMED
|
File without changes
|
{azure_ai_evaluation-1.11.1 → azure_ai_evaluation-1.11.2}/azure/ai/evaluation/_azure/_envs.py
RENAMED
|
File without changes
|