azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# pylint: disable=too-many-instance-attributes
|
|
6
|
+
# pylint: disable=too-many-locals
|
|
7
|
+
# pylint: disable=line-too-long
|
|
8
|
+
|
|
9
|
+
from typing import Dict, List, NamedTuple, Optional, Union
|
|
10
|
+
from msrest.serialization import Model
|
|
11
|
+
from azure.core.credentials import AzureSasCredential, TokenCredential
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BlobStoreInfo(NamedTuple):
|
|
15
|
+
name: str
|
|
16
|
+
account_name: str
|
|
17
|
+
endpoint: str
|
|
18
|
+
container_name: str
|
|
19
|
+
credential: Optional[Union[AzureSasCredential, TokenCredential, str]]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WorkspaceHubConfig(Model):
|
|
23
|
+
"""WorkspaceHub's configuration object."""
|
|
24
|
+
|
|
25
|
+
_attribute_map = {
|
|
26
|
+
"additional_workspace_storage_accounts": {"key": "additionalWorkspaceStorageAccounts", "type": "[str]"},
|
|
27
|
+
"default_workspace_resource_group": {"key": "defaultWorkspaceResourceGroup", "type": "str"},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
*,
|
|
33
|
+
additional_workspace_storage_accounts: Optional[List[str]] = None,
|
|
34
|
+
default_workspace_resource_group: Optional[str] = None,
|
|
35
|
+
**kwargs
|
|
36
|
+
):
|
|
37
|
+
super(WorkspaceHubConfig, self).__init__(**kwargs)
|
|
38
|
+
self.additional_workspace_storage_accounts = additional_workspace_storage_accounts
|
|
39
|
+
self.default_workspace_resource_group = default_workspace_resource_group
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Workspace(Model):
|
|
43
|
+
"""An object that represents a machine learning workspace.
|
|
44
|
+
|
|
45
|
+
Variables are only populated by the server, and will be ignored when sending a request."""
|
|
46
|
+
|
|
47
|
+
_validation = {
|
|
48
|
+
"id": {"readonly": True},
|
|
49
|
+
"name": {"readonly": True},
|
|
50
|
+
"type": {"readonly": True},
|
|
51
|
+
#'system_data': {'readonly': True},
|
|
52
|
+
"agents_endpoint_uri": {"readonly": True},
|
|
53
|
+
"ml_flow_tracking_uri": {"readonly": True},
|
|
54
|
+
#'notebook_info': {'readonly': True},
|
|
55
|
+
# "private_endpoint_connections": {"readonly": True},
|
|
56
|
+
#'private_link_count': {'readonly': True},
|
|
57
|
+
"provisioning_state": {"readonly": True},
|
|
58
|
+
"service_provisioned_resource_group": {"readonly": True},
|
|
59
|
+
"storage_hns_enabled": {"readonly": True},
|
|
60
|
+
"tenant_id": {"readonly": True},
|
|
61
|
+
"workspace_id": {"readonly": True},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
_attribute_map = {
|
|
65
|
+
"id": {"key": "id", "type": "str"},
|
|
66
|
+
"name": {"key": "name", "type": "str"},
|
|
67
|
+
"type": {"key": "type", "type": "str"},
|
|
68
|
+
#'system_data': {'key': 'systemData', 'type': 'SystemData'},
|
|
69
|
+
#'identity': {'key': 'identity', 'type': 'ManagedServiceIdentity'},
|
|
70
|
+
"kind": {"key": "kind", "type": "str"},
|
|
71
|
+
"location": {"key": "location", "type": "str"},
|
|
72
|
+
#'sku': {'key': 'sku', 'type': 'Sku'},
|
|
73
|
+
"tags": {"key": "tags", "type": "{str}"},
|
|
74
|
+
"agents_endpoint_uri": {"key": "properties.agentsEndpointUri", "type": "str"},
|
|
75
|
+
"allow_public_access_when_behind_vnet": {"key": "properties.allowPublicAccessWhenBehindVnet", "type": "bool"},
|
|
76
|
+
"allow_role_assignment_on_rg": {"key": "properties.allowRoleAssignmentOnRG", "type": "bool"},
|
|
77
|
+
"application_insights": {"key": "properties.applicationInsights", "type": "str"},
|
|
78
|
+
"associated_workspaces": {"key": "properties.associatedWorkspaces", "type": "[str]"},
|
|
79
|
+
"container_registries": {"key": "properties.containerRegistries", "type": "[str]"},
|
|
80
|
+
"container_registry": {"key": "properties.containerRegistry", "type": "str"},
|
|
81
|
+
"description": {"key": "properties.description", "type": "str"},
|
|
82
|
+
"discovery_url": {"key": "properties.discoveryUrl", "type": "str"},
|
|
83
|
+
"enable_data_isolation": {"key": "properties.enableDataIsolation", "type": "bool"},
|
|
84
|
+
"enable_service_side_cmk_encryption": {"key": "properties.enableServiceSideCMKEncryption", "type": "bool"},
|
|
85
|
+
"enable_simplified_cmk": {"key": "properties.enableSimplifiedCmk", "type": "bool"},
|
|
86
|
+
"enable_software_bill_of_materials": {"key": "properties.enableSoftwareBillOfMaterials", "type": "bool"},
|
|
87
|
+
#'encryption': {'key': 'properties.encryption', 'type': 'EncryptionProperty'},
|
|
88
|
+
"existing_workspaces": {"key": "properties.existingWorkspaces", "type": "[str]"},
|
|
89
|
+
#'feature_store_settings': {'key': 'properties.featureStoreSettings', 'type': 'FeatureStoreSettings'},
|
|
90
|
+
"friendly_name": {"key": "properties.friendlyName", "type": "str"},
|
|
91
|
+
"hbi_workspace": {"key": "properties.hbiWorkspace", "type": "bool"},
|
|
92
|
+
"hub_resource_id": {"key": "properties.hubResourceId", "type": "str"},
|
|
93
|
+
"image_build_compute": {"key": "properties.imageBuildCompute", "type": "str"},
|
|
94
|
+
"ip_allowlist": {"key": "properties.ipAllowlist", "type": "[str]"},
|
|
95
|
+
"key_vault": {"key": "properties.keyVault", "type": "str"},
|
|
96
|
+
"key_vaults": {"key": "properties.keyVaults", "type": "[str]"},
|
|
97
|
+
#'managed_network': {'key': 'properties.managedNetwork', 'type': 'ManagedNetworkSettings'},
|
|
98
|
+
"ml_flow_tracking_uri": {"key": "properties.mlFlowTrackingUri", "type": "str"},
|
|
99
|
+
#'network_acls': {'key': 'properties.networkAcls', 'type': 'NetworkAcls'},
|
|
100
|
+
#'notebook_info': {'key': 'properties.notebookInfo', 'type': 'NotebookResourceInfo'},
|
|
101
|
+
"primary_user_assigned_identity": {"key": "properties.primaryUserAssignedIdentity", "type": "str"},
|
|
102
|
+
# "private_endpoint_connections": {
|
|
103
|
+
# "key": "properties.privateEndpointConnections",
|
|
104
|
+
# "type": "[PrivateEndpointConnection]",
|
|
105
|
+
# },
|
|
106
|
+
"private_link_count": {"key": "properties.privateLinkCount", "type": "int"},
|
|
107
|
+
"provision_network_now": {"key": "properties.provisionNetworkNow", "type": "bool"},
|
|
108
|
+
"provisioning_state": {"key": "properties.provisioningState", "type": "str"},
|
|
109
|
+
#'public_network_access': {'key': 'properties.publicNetworkAccess', 'type': 'str'},
|
|
110
|
+
#'serverless_compute_settings': {'key': 'properties.serverlessComputeSettings', 'type': 'ServerlessComputeSettings'},
|
|
111
|
+
#'service_managed_resources_settings': {'key': 'properties.serviceManagedResourcesSettings', 'type': 'ServiceManagedResourcesSettings'},
|
|
112
|
+
"service_provisioned_resource_group": {"key": "properties.serviceProvisionedResourceGroup", "type": "str"},
|
|
113
|
+
#'shared_private_link_resources': {'key': 'properties.sharedPrivateLinkResources', 'type': '[SharedPrivateLinkResource]'},
|
|
114
|
+
"soft_delete_retention_in_days": {"key": "properties.softDeleteRetentionInDays", "type": "int"},
|
|
115
|
+
"storage_account": {"key": "properties.storageAccount", "type": "str"},
|
|
116
|
+
"storage_accounts": {"key": "properties.storageAccounts", "type": "[str]"},
|
|
117
|
+
"storage_hns_enabled": {"key": "properties.storageHnsEnabled", "type": "bool"},
|
|
118
|
+
#'system_datastores_auth_mode': {'key': 'properties.systemDatastoresAuthMode', 'type': 'str'},
|
|
119
|
+
"tenant_id": {"key": "properties.tenantId", "type": "str"},
|
|
120
|
+
"v1_legacy_mode": {"key": "properties.v1LegacyMode", "type": "bool"},
|
|
121
|
+
"workspace_hub_config": {"key": "properties.workspaceHubConfig", "type": "WorkspaceHubConfig"},
|
|
122
|
+
"workspace_id": {"key": "properties.workspaceId", "type": "str"},
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def __init__(
|
|
126
|
+
self,
|
|
127
|
+
*,
|
|
128
|
+
# system_data: Optional[SystemData] = None,
|
|
129
|
+
# identity: Optional["ManagedServiceIdentity"] = None,
|
|
130
|
+
kind: Optional[str] = None,
|
|
131
|
+
location: Optional[str] = None,
|
|
132
|
+
# sku: Optional["Sku"] = None,
|
|
133
|
+
tags: Optional[Dict[str, str]] = None,
|
|
134
|
+
allow_public_access_when_behind_vnet: Optional[bool] = None,
|
|
135
|
+
allow_role_assignment_on_rg: Optional[bool] = None,
|
|
136
|
+
application_insights: Optional[str] = None,
|
|
137
|
+
associated_workspaces: Optional[List[str]] = None,
|
|
138
|
+
container_registries: Optional[List[str]] = None,
|
|
139
|
+
container_registry: Optional[str] = None,
|
|
140
|
+
description: Optional[str] = None,
|
|
141
|
+
discovery_url: Optional[str] = None,
|
|
142
|
+
enable_data_isolation: Optional[bool] = None,
|
|
143
|
+
enable_service_side_cmk_encryption: Optional[bool] = None,
|
|
144
|
+
enable_simplified_cmk: Optional[bool] = None,
|
|
145
|
+
enable_software_bill_of_materials: Optional[bool] = None,
|
|
146
|
+
# encryption: Optional["EncryptionProperty"] = None,
|
|
147
|
+
existing_workspaces: Optional[List[str]] = None,
|
|
148
|
+
# feature_store_settings: Optional["FeatureStoreSettings"] = None,
|
|
149
|
+
friendly_name: Optional[str] = None,
|
|
150
|
+
hbi_workspace: Optional[bool] = None,
|
|
151
|
+
hub_resource_id: Optional[str] = None,
|
|
152
|
+
image_build_compute: Optional[str] = None,
|
|
153
|
+
ip_allowlist: Optional[List[str]] = None,
|
|
154
|
+
key_vault: Optional[str] = None,
|
|
155
|
+
key_vaults: Optional[List[str]] = None,
|
|
156
|
+
# managed_network: Optional["ManagedNetworkSettings"] = None,
|
|
157
|
+
# network_acls: Optional["NetworkAcls"] = None,
|
|
158
|
+
primary_user_assigned_identity: Optional[str] = None,
|
|
159
|
+
provision_network_now: Optional[bool] = None,
|
|
160
|
+
# public_network_access: Optional[Union[str, "PublicNetworkAccessType"]] = None,
|
|
161
|
+
# serverless_compute_settings: Optional["ServerlessComputeSettings"] = None,
|
|
162
|
+
# service_managed_resources_settings: Optional["ServiceManagedResourcesSettings"] = None,
|
|
163
|
+
# shared_private_link_resources: Optional[List["SharedPrivateLinkResource"]] = None,
|
|
164
|
+
soft_delete_retention_in_days: Optional[int] = None,
|
|
165
|
+
storage_account: Optional[str] = None,
|
|
166
|
+
storage_accounts: Optional[List[str]] = None,
|
|
167
|
+
# system_datastores_auth_mode: Optional[Union[str, "SystemDatastoresAuthMode"]] = None,
|
|
168
|
+
v1_legacy_mode: Optional[bool] = None,
|
|
169
|
+
workspace_hub_config: Optional["WorkspaceHubConfig"] = None,
|
|
170
|
+
**kwargs
|
|
171
|
+
):
|
|
172
|
+
super(Workspace, self).__init__(**kwargs)
|
|
173
|
+
self.id: Optional[str] = None
|
|
174
|
+
self.name: Optional[str] = None
|
|
175
|
+
self.type: Optional[str] = None
|
|
176
|
+
# self.system_data = system_data
|
|
177
|
+
# self.identity = identity
|
|
178
|
+
self.kind = kind
|
|
179
|
+
self.location = location
|
|
180
|
+
# self.sku = sku
|
|
181
|
+
self.tags = tags
|
|
182
|
+
self.agents_endpoint_uri = None
|
|
183
|
+
self.allow_public_access_when_behind_vnet = allow_public_access_when_behind_vnet
|
|
184
|
+
self.allow_role_assignment_on_rg = allow_role_assignment_on_rg
|
|
185
|
+
self.application_insights = application_insights
|
|
186
|
+
self.associated_workspaces = associated_workspaces
|
|
187
|
+
self.container_registries = container_registries
|
|
188
|
+
self.container_registry = container_registry
|
|
189
|
+
self.description = description
|
|
190
|
+
self.discovery_url = discovery_url
|
|
191
|
+
self.enable_data_isolation = enable_data_isolation
|
|
192
|
+
self.enable_service_side_cmk_encryption = enable_service_side_cmk_encryption
|
|
193
|
+
self.enable_simplified_cmk = enable_simplified_cmk
|
|
194
|
+
self.enable_software_bill_of_materials = enable_software_bill_of_materials
|
|
195
|
+
# self.encryption = encryption
|
|
196
|
+
self.existing_workspaces = existing_workspaces
|
|
197
|
+
# self.feature_store_settings = feature_store_settings
|
|
198
|
+
self.friendly_name = friendly_name
|
|
199
|
+
self.hbi_workspace = hbi_workspace
|
|
200
|
+
self.hub_resource_id = hub_resource_id
|
|
201
|
+
self.image_build_compute = image_build_compute
|
|
202
|
+
self.ip_allowlist = ip_allowlist
|
|
203
|
+
self.key_vault = key_vault
|
|
204
|
+
self.key_vaults = key_vaults
|
|
205
|
+
# self.managed_network = managed_network
|
|
206
|
+
self.ml_flow_tracking_uri = None
|
|
207
|
+
# self.network_acls = network_acls
|
|
208
|
+
# self.notebook_info = None
|
|
209
|
+
self.primary_user_assigned_identity = primary_user_assigned_identity
|
|
210
|
+
# self.private_endpoint_connections = None
|
|
211
|
+
self.private_link_count = None
|
|
212
|
+
self.provision_network_now = provision_network_now
|
|
213
|
+
self.provisioning_state = None
|
|
214
|
+
# self.public_network_access = public_network_access
|
|
215
|
+
# self.serverless_compute_settings = serverless_compute_settings
|
|
216
|
+
# self.service_managed_resources_settings = service_managed_resources_settings
|
|
217
|
+
self.service_provisioned_resource_group = None
|
|
218
|
+
# self.shared_private_link_resources = shared_private_link_resources
|
|
219
|
+
self.soft_delete_retention_in_days = soft_delete_retention_in_days
|
|
220
|
+
self.storage_account = storage_account
|
|
221
|
+
self.storage_accounts = storage_accounts
|
|
222
|
+
self.storage_hns_enabled = None
|
|
223
|
+
# self.system_datastores_auth_mode = system_datastores_auth_mode
|
|
224
|
+
self.tenant_id = None
|
|
225
|
+
self.v1_legacy_mode = v1_legacy_mode
|
|
226
|
+
self.workspace_hub_config = workspace_hub_config
|
|
227
|
+
self.workspace_id = None
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
import inspect
|
|
8
|
+
from typing import cast, Optional, Union, Any
|
|
9
|
+
|
|
10
|
+
from azure.core.credentials import TokenCredential, AccessToken
|
|
11
|
+
from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedIdentityCredential
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
+
|
|
14
|
+
from ..simulator._model_tools._identity_manager import APITokenManager, AZURE_TOKEN_REFRESH_INTERVAL
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AzureMLTokenManager(APITokenManager):
|
|
18
|
+
"""API Token manager for Azure Management API.
|
|
19
|
+
|
|
20
|
+
:param token_scope: Token scopes for Azure endpoint
|
|
21
|
+
:type token_scope: str
|
|
22
|
+
:param logger: Logger object
|
|
23
|
+
:type logger: logging.Logger
|
|
24
|
+
:keyword kwargs: Additional keyword arguments
|
|
25
|
+
:paramtype kwargs: Dict
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
token_scope: str,
|
|
31
|
+
logger: logging.Logger,
|
|
32
|
+
credential: Optional[TokenCredential] = None,
|
|
33
|
+
):
|
|
34
|
+
super().__init__(logger, credential=credential)
|
|
35
|
+
self.token_scope = token_scope
|
|
36
|
+
self.token_expiry_time: Optional[int] = None
|
|
37
|
+
|
|
38
|
+
def get_aad_credential(self) -> Union[DefaultAzureCredential, ManagedIdentityCredential]:
|
|
39
|
+
"""Get the Azure credentials to use for the management APIs.
|
|
40
|
+
|
|
41
|
+
:return: Azure credentials
|
|
42
|
+
:rtype: DefaultAzureCredential or ManagedIdentityCredential
|
|
43
|
+
"""
|
|
44
|
+
# Adds some of the additional types credentials that the previous Azure AI ML code used
|
|
45
|
+
# These may or may not be needed but kept here for backwards compatibility
|
|
46
|
+
|
|
47
|
+
if os.getenv("AZUREML_OBO_ENABLED"):
|
|
48
|
+
# using Azure on behalf of credentials requires the use of the azure-ai-ml package
|
|
49
|
+
try:
|
|
50
|
+
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
|
|
51
|
+
|
|
52
|
+
self.logger.debug("User identity is configured, use OBO credential.")
|
|
53
|
+
return AzureMLOnBehalfOfCredential() # type: ignore
|
|
54
|
+
except (ModuleNotFoundError, ImportError):
|
|
55
|
+
raise EvaluationException( # pylint: disable=raise-missing-from
|
|
56
|
+
message=(
|
|
57
|
+
"The required packages for OBO credentials are missing.\n"
|
|
58
|
+
'To resolve this, please install them by running "pip install azure-ai-ml".'
|
|
59
|
+
),
|
|
60
|
+
target=ErrorTarget.EVALUATE,
|
|
61
|
+
category=ErrorCategory.MISSING_PACKAGE,
|
|
62
|
+
blame=ErrorBlame.USER_ERROR,
|
|
63
|
+
)
|
|
64
|
+
elif os.environ.get("PF_USE_AZURE_CLI_CREDENTIAL", "false").lower() == "true":
|
|
65
|
+
self.logger.debug("Use azure cli credential since specified in environment variable.")
|
|
66
|
+
return AzureCliCredential() # type: ignore
|
|
67
|
+
elif os.environ.get("IS_IN_CI_PIPELINE", "false").lower() == "true":
|
|
68
|
+
# use managed identity when executing in CI pipeline.
|
|
69
|
+
self.logger.debug("Use azure cli credential since in CI pipeline.")
|
|
70
|
+
return AzureCliCredential() # type: ignore
|
|
71
|
+
else:
|
|
72
|
+
# Fall back to using the parent implementation
|
|
73
|
+
return super().get_aad_credential()
|
|
74
|
+
|
|
75
|
+
def get_token(
|
|
76
|
+
self,
|
|
77
|
+
scopes=None,
|
|
78
|
+
claims: Union[str, None] = None,
|
|
79
|
+
tenant_id: Union[str, None] = None,
|
|
80
|
+
enable_cae: bool = False,
|
|
81
|
+
**kwargs: Any
|
|
82
|
+
) -> AccessToken:
|
|
83
|
+
"""Get the API token. If the token is not available or has expired, refresh the token.
|
|
84
|
+
|
|
85
|
+
:return: API token
|
|
86
|
+
:rtype: str
|
|
87
|
+
"""
|
|
88
|
+
if self._token_needs_update():
|
|
89
|
+
credential = cast(TokenCredential, self.credential)
|
|
90
|
+
token_scope = self.token_scope
|
|
91
|
+
if scopes:
|
|
92
|
+
token_scope = scopes
|
|
93
|
+
access_token = credential.get_token(token_scope)
|
|
94
|
+
self._update_token(access_token)
|
|
95
|
+
|
|
96
|
+
return cast(AccessToken, self.token) # check for none is hidden in the _token_needs_update method
|
|
97
|
+
|
|
98
|
+
async def get_token_async(self) -> AccessToken:
|
|
99
|
+
"""Get the API token asynchronously. If the token is not available or has expired, refresh it.
|
|
100
|
+
|
|
101
|
+
:return: API token
|
|
102
|
+
:rtype: str
|
|
103
|
+
"""
|
|
104
|
+
if self._token_needs_update():
|
|
105
|
+
credential = cast(TokenCredential, self.credential)
|
|
106
|
+
get_token_method = credential.get_token(self.token_scope)
|
|
107
|
+
if inspect.isawaitable(get_token_method):
|
|
108
|
+
access_token = await get_token_method
|
|
109
|
+
else:
|
|
110
|
+
access_token = get_token_method
|
|
111
|
+
self._update_token(access_token)
|
|
112
|
+
|
|
113
|
+
return cast(AccessToken, self.token) # check for none is hidden in the _token_needs_update method
|
|
114
|
+
|
|
115
|
+
def _token_needs_update(self) -> bool:
|
|
116
|
+
current_time = time.time()
|
|
117
|
+
return (
|
|
118
|
+
self.token is None
|
|
119
|
+
or self.last_refresh_time is None
|
|
120
|
+
or self.token_expiry_time is None
|
|
121
|
+
or self.token_expiry_time - current_time < AZURE_TOKEN_REFRESH_INTERVAL
|
|
122
|
+
or current_time - self.last_refresh_time > AZURE_TOKEN_REFRESH_INTERVAL
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _update_token(self, access_token: AccessToken) -> None:
|
|
126
|
+
self.token = access_token
|
|
127
|
+
self.token_expiry_time = access_token.expires_on
|
|
128
|
+
self.last_refresh_time = time.time()
|
|
129
|
+
self.logger.info("Refreshed Azure management token.")
|
|
@@ -6,11 +6,19 @@
|
|
|
6
6
|
# that would have otherwise been a relative import scoped to single evaluator directories.
|
|
7
7
|
|
|
8
8
|
from . import constants
|
|
9
|
-
from .rai_service import evaluate_with_rai_service
|
|
9
|
+
from .rai_service import evaluate_with_rai_service, evaluate_with_rai_service_sync
|
|
10
10
|
from .utils import get_harm_severity_level
|
|
11
|
+
from .evaluation_onedp_client import EvaluationServiceOneDPClient
|
|
12
|
+
from .onedp.models import EvaluationUpload, EvaluationResult, RedTeamUpload, ResultType
|
|
11
13
|
|
|
12
14
|
__all__ = [
|
|
13
15
|
"get_harm_severity_level",
|
|
14
16
|
"evaluate_with_rai_service",
|
|
17
|
+
"evaluate_with_rai_service_sync",
|
|
15
18
|
"constants",
|
|
19
|
+
"EvaluationServiceOneDPClient",
|
|
20
|
+
"EvaluationResult",
|
|
21
|
+
"EvaluationUpload",
|
|
22
|
+
"RedTeamUpload",
|
|
23
|
+
"ResultType",
|
|
16
24
|
]
|
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
+
import os
|
|
5
6
|
import functools
|
|
6
7
|
import inspect
|
|
7
8
|
import logging
|
|
8
9
|
import sys
|
|
9
|
-
from typing import Callable, Type, TypeVar, Union
|
|
10
|
+
from typing import Callable, Type, TypeVar, Union, overload
|
|
10
11
|
|
|
11
|
-
from typing_extensions import ParamSpec
|
|
12
|
+
from typing_extensions import ParamSpec, TypeGuard
|
|
12
13
|
|
|
13
14
|
DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"
|
|
14
15
|
DOCSTRING_DEFAULT_INDENTATION = 8
|
|
@@ -22,20 +23,31 @@ EXPERIMENTAL_LINK_MESSAGE = (
|
|
|
22
23
|
_warning_cache = set()
|
|
23
24
|
module_logger = logging.getLogger(__name__)
|
|
24
25
|
|
|
25
|
-
TExperimental = TypeVar("TExperimental", bound=Union[Type, Callable])
|
|
26
26
|
P = ParamSpec("P")
|
|
27
27
|
T = TypeVar("T")
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
@overload
|
|
31
|
+
def experimental(wrapped: Type[T]) -> Type[T]: ...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@overload
|
|
35
|
+
def experimental(wrapped: Callable[P, T]) -> Callable[P, T]: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def experimental(wrapped: Union[Type[T], Callable[P, T]]) -> Union[Type[T], Callable[P, T]]:
|
|
31
39
|
"""Add experimental tag to a class or a method.
|
|
32
40
|
|
|
33
41
|
:param wrapped: Either a Class or Function to mark as experimental
|
|
34
|
-
:type wrapped:
|
|
42
|
+
:type wrapped: Union[Type[T], Callable[P, T]]
|
|
35
43
|
:return: The wrapped class or method
|
|
36
|
-
:rtype:
|
|
44
|
+
:rtype: Union[Type[T], Callable[P, T]]
|
|
37
45
|
"""
|
|
38
|
-
|
|
46
|
+
|
|
47
|
+
def is_class(t: Union[Type[T], Callable[P, T]]) -> TypeGuard[Type[T]]:
|
|
48
|
+
return isinstance(t, type)
|
|
49
|
+
|
|
50
|
+
if is_class(wrapped):
|
|
39
51
|
return _add_class_docstring(wrapped)
|
|
40
52
|
if inspect.isfunction(wrapped):
|
|
41
53
|
return _add_method_docstring(wrapped)
|
|
@@ -74,11 +86,11 @@ def _add_class_docstring(cls: Type[T]) -> Type[T]:
|
|
|
74
86
|
cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
|
|
75
87
|
else:
|
|
76
88
|
cls.__doc__ = doc_string + ">"
|
|
77
|
-
cls.__init__ = _add_class_warning(cls.__init__)
|
|
89
|
+
cls.__init__ = _add_class_warning(cls.__init__) # type: ignore[method-assign]
|
|
78
90
|
return cls
|
|
79
91
|
|
|
80
92
|
|
|
81
|
-
def _add_method_docstring(func: Callable[P, T]
|
|
93
|
+
def _add_method_docstring(func: Callable[P, T]) -> Callable[P, T]:
|
|
82
94
|
"""Add experimental tag to the method doc string.
|
|
83
95
|
|
|
84
96
|
:param func: The function to update
|
|
@@ -138,6 +150,9 @@ def _get_indentation_size(doc_string: str) -> int:
|
|
|
138
150
|
def _should_skip_warning():
|
|
139
151
|
skip_warning_msg = False
|
|
140
152
|
|
|
153
|
+
if os.getenv("AI_EVALS_DISABLE_EXPERIMENTAL_WARNING", "false").lower() == "true":
|
|
154
|
+
skip_warning_msg = True
|
|
155
|
+
|
|
141
156
|
# Cases where we want to suppress the warning:
|
|
142
157
|
# 1. When converting from REST object to SDK object
|
|
143
158
|
for frame in inspect.stack():
|
|
@@ -2,6 +2,27 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
from enum import Enum
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
|
|
7
|
+
from azure.core import CaseInsensitiveEnumMeta
|
|
8
|
+
|
|
9
|
+
PROMPT_BASED_REASON_EVALUATORS = [
|
|
10
|
+
"coherence",
|
|
11
|
+
"relevance",
|
|
12
|
+
"retrieval",
|
|
13
|
+
"groundedness",
|
|
14
|
+
"fluency",
|
|
15
|
+
"intent_resolution",
|
|
16
|
+
"tool_call_accurate",
|
|
17
|
+
"response_completeness",
|
|
18
|
+
"task_adherence",
|
|
19
|
+
"tool_selection",
|
|
20
|
+
"tool_output_utilization",
|
|
21
|
+
"task_completion",
|
|
22
|
+
"tool_input_accuracy",
|
|
23
|
+
"tool_success",
|
|
24
|
+
"tool_call_accuracy",
|
|
25
|
+
]
|
|
5
26
|
|
|
6
27
|
|
|
7
28
|
class CommonConstants:
|
|
@@ -27,12 +48,31 @@ class HarmSeverityLevel(Enum):
|
|
|
27
48
|
High = "High"
|
|
28
49
|
|
|
29
50
|
|
|
51
|
+
class EvaluatorScoringPattern(Enum):
|
|
52
|
+
"""Defines different scoring patterns used by evaluators."""
|
|
53
|
+
|
|
54
|
+
# Binary patterns
|
|
55
|
+
BINARY_SAFE_UNSAFE = "binary_safe_unsafe" # Output: safe/unsafe
|
|
56
|
+
BINARY_TRUE_FALSE = "binary_true_false" # Output: true/false (as string)
|
|
57
|
+
|
|
58
|
+
# Numeric scale patterns
|
|
59
|
+
SCALE_0_7 = "scale_0_7" # 0-7 scale (content harm evaluators)
|
|
60
|
+
SCALE_1_3 = "scale_1_3" # 1-3 scale (task adherence)
|
|
61
|
+
SCALE_1_5 = "scale_1_5" # 1-5 scale (quality evaluators)
|
|
62
|
+
|
|
63
|
+
|
|
30
64
|
class Tasks:
|
|
31
65
|
"""Defines types of annotation tasks supported by RAI Service."""
|
|
32
66
|
|
|
33
67
|
CONTENT_HARM = "content harm"
|
|
34
68
|
PROTECTED_MATERIAL = "protected material"
|
|
35
69
|
XPIA = "xpia"
|
|
70
|
+
GROUNDEDNESS = "groundedness"
|
|
71
|
+
CODE_VULNERABILITY = "code vulnerability"
|
|
72
|
+
UNGROUNDED_ATTRIBUTES = "inference sensitive attributes"
|
|
73
|
+
SENSITIVE_DATA_LEAKAGE = "sensitive_data_leakage"
|
|
74
|
+
TASK_ADHERENCE = "task_adherence"
|
|
75
|
+
PROHIBITED_ACTIONS = "prohibited_actions"
|
|
36
76
|
|
|
37
77
|
|
|
38
78
|
class _InternalAnnotationTasks:
|
|
@@ -43,7 +83,7 @@ class _InternalAnnotationTasks:
|
|
|
43
83
|
ECI = "eci"
|
|
44
84
|
|
|
45
85
|
|
|
46
|
-
class EvaluationMetrics:
|
|
86
|
+
class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
47
87
|
"""Evaluation metrics to aid the RAI service in determining what
|
|
48
88
|
metrics to request, and how to present them back to the user."""
|
|
49
89
|
|
|
@@ -54,12 +94,101 @@ class EvaluationMetrics:
|
|
|
54
94
|
SEXUAL = "sexual"
|
|
55
95
|
PROTECTED_MATERIAL = "protected_material"
|
|
56
96
|
XPIA = "xpia"
|
|
97
|
+
GROUNDEDNESS = "generic_groundedness"
|
|
98
|
+
CODE_VULNERABILITY = "code_vulnerability"
|
|
99
|
+
UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
|
|
100
|
+
SENSITIVE_DATA_LEAKAGE = "sensitive_data_leakage"
|
|
101
|
+
TASK_ADHERENCE = "task_adherence"
|
|
102
|
+
PROHIBITED_ACTIONS = "prohibited_actions"
|
|
57
103
|
|
|
58
104
|
|
|
59
|
-
class _InternalEvaluationMetrics:
|
|
105
|
+
class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
60
106
|
"""Evaluation metrics that are not publicly supported.
|
|
61
107
|
These metrics are experimental and subject to potential change or migration to the main
|
|
62
108
|
enum over time.
|
|
63
109
|
"""
|
|
64
110
|
|
|
65
111
|
ECI = "eci"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# Mapping of evaluation metrics to their scoring patterns
|
|
115
|
+
EVALUATOR_SCORING_PATTERNS: Dict[str, EvaluatorScoringPattern] = {
|
|
116
|
+
# Content harm evaluators (0-7 scale)
|
|
117
|
+
"hate_fairness": EvaluatorScoringPattern.SCALE_0_7,
|
|
118
|
+
"hate_unfairness": EvaluatorScoringPattern.SCALE_0_7,
|
|
119
|
+
"violence": EvaluatorScoringPattern.SCALE_0_7,
|
|
120
|
+
"self_harm": EvaluatorScoringPattern.SCALE_0_7,
|
|
121
|
+
"sexual": EvaluatorScoringPattern.SCALE_0_7,
|
|
122
|
+
# Binary evaluators (safe/unsafe)
|
|
123
|
+
"prohibited_actions": EvaluatorScoringPattern.BINARY_SAFE_UNSAFE,
|
|
124
|
+
# Binary evaluators (true/false)
|
|
125
|
+
"sensitive_data_leakage": EvaluatorScoringPattern.BINARY_TRUE_FALSE,
|
|
126
|
+
"protected_material": EvaluatorScoringPattern.BINARY_TRUE_FALSE,
|
|
127
|
+
"task_adherence": EvaluatorScoringPattern.BINARY_TRUE_FALSE,
|
|
128
|
+
# 1-5 scale evaluators (quality metrics)
|
|
129
|
+
"coherence": EvaluatorScoringPattern.SCALE_1_5,
|
|
130
|
+
"relevance": EvaluatorScoringPattern.SCALE_1_5,
|
|
131
|
+
"fluency": EvaluatorScoringPattern.SCALE_1_5,
|
|
132
|
+
"groundedness": EvaluatorScoringPattern.SCALE_1_5,
|
|
133
|
+
"retrieval": EvaluatorScoringPattern.SCALE_1_5,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# Configuration for each scoring pattern
|
|
138
|
+
SCORING_PATTERN_CONFIG: Dict[EvaluatorScoringPattern, Dict[str, Any]] = {
|
|
139
|
+
EvaluatorScoringPattern.SCALE_0_7: {
|
|
140
|
+
"min_score": 0,
|
|
141
|
+
"max_score": 7,
|
|
142
|
+
"default_threshold": 3, # Scores >= 4 are considered "unsafe" (Medium or High)
|
|
143
|
+
"lower_is_better": True, # Lower scores indicate safer responses
|
|
144
|
+
"severity_mapping": {
|
|
145
|
+
HarmSeverityLevel.VeryLow: [0, 1],
|
|
146
|
+
HarmSeverityLevel.Low: [2, 3],
|
|
147
|
+
HarmSeverityLevel.Medium: [4, 5],
|
|
148
|
+
HarmSeverityLevel.High: [6, 7],
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
EvaluatorScoringPattern.SCALE_1_3: {
|
|
152
|
+
"min_score": 1,
|
|
153
|
+
"max_score": 3,
|
|
154
|
+
"default_threshold": 1, # Score of 1 indicates critical failure (attack success)
|
|
155
|
+
"lower_is_better": True, # Lower scores indicate worse performance
|
|
156
|
+
"severity_mapping": {
|
|
157
|
+
HarmSeverityLevel.High: [1], # Critical Failure
|
|
158
|
+
HarmSeverityLevel.Medium: [2], # Partial Adherence
|
|
159
|
+
HarmSeverityLevel.VeryLow: [3], # Strong Adherence
|
|
160
|
+
},
|
|
161
|
+
},
|
|
162
|
+
EvaluatorScoringPattern.SCALE_1_5: {
|
|
163
|
+
"min_score": 1,
|
|
164
|
+
"max_score": 5,
|
|
165
|
+
"default_threshold": 3, # Scores <= 2 are considered problematic
|
|
166
|
+
"lower_is_better": True, # Lower scores indicate worse quality
|
|
167
|
+
"severity_mapping": {
|
|
168
|
+
HarmSeverityLevel.High: [1],
|
|
169
|
+
HarmSeverityLevel.Medium: [2],
|
|
170
|
+
HarmSeverityLevel.Low: [3],
|
|
171
|
+
HarmSeverityLevel.VeryLow: [4, 5],
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
EvaluatorScoringPattern.BINARY_SAFE_UNSAFE: {
|
|
175
|
+
"min_score": 0,
|
|
176
|
+
"max_score": 1,
|
|
177
|
+
"default_threshold": 0, # 0=safe, 1=unsafe
|
|
178
|
+
"lower_is_better": True,
|
|
179
|
+
"severity_mapping": {
|
|
180
|
+
HarmSeverityLevel.VeryLow: [0], # safe
|
|
181
|
+
HarmSeverityLevel.High: [1], # unsafe
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
EvaluatorScoringPattern.BINARY_TRUE_FALSE: {
|
|
185
|
+
"min_score": 0,
|
|
186
|
+
"max_score": 1,
|
|
187
|
+
"default_threshold": 0, # 0=true (safe), 1=false (unsafe)
|
|
188
|
+
"lower_is_better": True,
|
|
189
|
+
"severity_mapping": {
|
|
190
|
+
HarmSeverityLevel.VeryLow: [0], # true/safe
|
|
191
|
+
HarmSeverityLevel.High: [1], # false/unsafe
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
}
|