azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +43 -1
- azure/ai/evaluation/_azure/_models.py +6 -6
- azure/ai/evaluation/_common/constants.py +6 -2
- azure/ai/evaluation/_common/rai_service.py +38 -4
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +22 -2
- azure/ai/evaluation/_constants.py +7 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +804 -0
- azure/ai/evaluation/_converters/_models.py +302 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +1 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +31 -2
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +43 -3
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +3 -1
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +43 -4
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +21 -3
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
- azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +157 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
- azure/ai/evaluation/_exceptions.py +5 -0
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_red_team/__init__.py +3 -0
- azure/ai/evaluation/_red_team/_attack_objective_generator.py +192 -0
- azure/ai/evaluation/_red_team/_attack_strategy.py +42 -0
- azure/ai/evaluation/_red_team/_callback_chat_target.py +74 -0
- azure/ai/evaluation/_red_team/_default_converter.py +21 -0
- azure/ai/evaluation/_red_team/_red_team.py +1858 -0
- azure/ai/evaluation/_red_team/_red_team_result.py +246 -0
- azure/ai/evaluation/_red_team/_utils/__init__.py +3 -0
- azure/ai/evaluation/_red_team/_utils/constants.py +64 -0
- azure/ai/evaluation/_red_team/_utils/formatting_utils.py +164 -0
- azure/ai/evaluation/_red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/_red_team/_utils/strategy_utils.py +188 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/METADATA +69 -15
- azure_ai_evaluation-1.4.0.dist-info/RECORD +197 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.4.0.dist-info}/top_level.txt +0 -0
azure/ai/evaluation/__init__.py
CHANGED
|
@@ -17,14 +17,20 @@ from ._evaluators._fluency import FluencyEvaluator
|
|
|
17
17
|
from ._evaluators._gleu import GleuScoreEvaluator
|
|
18
18
|
from ._evaluators._groundedness import GroundednessEvaluator
|
|
19
19
|
from ._evaluators._service_groundedness import GroundednessProEvaluator
|
|
20
|
+
from ._evaluators._intent_resolution import IntentResolutionEvaluator
|
|
20
21
|
from ._evaluators._meteor import MeteorScoreEvaluator
|
|
21
22
|
from ._evaluators._protected_material import ProtectedMaterialEvaluator
|
|
22
23
|
from ._evaluators._qa import QAEvaluator
|
|
24
|
+
from ._evaluators._response_completeness import ResponseCompletenessEvaluator
|
|
25
|
+
from ._evaluators._task_adherence import TaskAdherenceEvaluator
|
|
23
26
|
from ._evaluators._relevance import RelevanceEvaluator
|
|
24
27
|
from ._evaluators._retrieval import RetrievalEvaluator
|
|
25
28
|
from ._evaluators._rouge import RougeScoreEvaluator, RougeType
|
|
26
29
|
from ._evaluators._similarity import SimilarityEvaluator
|
|
27
30
|
from ._evaluators._xpia import IndirectAttackEvaluator
|
|
31
|
+
from ._evaluators._code_vulnerability import CodeVulnerabilityEvaluator
|
|
32
|
+
from ._evaluators._ungrounded_attributes import UngroundedAttributesEvaluator
|
|
33
|
+
from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
|
|
28
34
|
from ._model_configurations import (
|
|
29
35
|
AzureAIProject,
|
|
30
36
|
AzureOpenAIModelConfiguration,
|
|
@@ -35,6 +41,34 @@ from ._model_configurations import (
|
|
|
35
41
|
OpenAIModelConfiguration,
|
|
36
42
|
)
|
|
37
43
|
|
|
44
|
+
_patch_all = []
|
|
45
|
+
|
|
46
|
+
# The converter from the AI service to the evaluator schema requires a dependency on
|
|
47
|
+
# ai.projects, but we also don't want to force users installing ai.evaluations to pull
|
|
48
|
+
# in ai.projects. So we only import it if it's available and the user has ai.projects.
|
|
49
|
+
try:
|
|
50
|
+
from ._converters._ai_services import AIAgentConverter
|
|
51
|
+
_patch_all.append("AIAgentConverter")
|
|
52
|
+
except ImportError:
|
|
53
|
+
print("[INFO] Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
|
|
54
|
+
|
|
55
|
+
# RedTeam requires a dependency on pyrit, but python 3.9 is not supported by pyrit.
|
|
56
|
+
# So we only import it if it's available and the user has pyrit.
|
|
57
|
+
try:
|
|
58
|
+
from ._red_team._red_team import RedTeam
|
|
59
|
+
from ._red_team._attack_strategy import AttackStrategy
|
|
60
|
+
from ._red_team._attack_objective_generator import RiskCategory
|
|
61
|
+
from ._red_team._red_team_result import RedTeamOutput
|
|
62
|
+
_patch_all.extend([
|
|
63
|
+
"RedTeam",
|
|
64
|
+
"RedTeamOutput",
|
|
65
|
+
"AttackStrategy",
|
|
66
|
+
"RiskCategory",
|
|
67
|
+
])
|
|
68
|
+
except ImportError:
|
|
69
|
+
print("[INFO] Could not import RedTeam. Please install the dependency with `pip install azure-ai-evaluation[redteam]`.")
|
|
70
|
+
|
|
71
|
+
|
|
38
72
|
__all__ = [
|
|
39
73
|
"evaluate",
|
|
40
74
|
"CoherenceEvaluator",
|
|
@@ -42,6 +76,9 @@ __all__ = [
|
|
|
42
76
|
"FluencyEvaluator",
|
|
43
77
|
"GroundednessEvaluator",
|
|
44
78
|
"GroundednessProEvaluator",
|
|
79
|
+
"ResponseCompletenessEvaluator",
|
|
80
|
+
"TaskAdherenceEvaluator",
|
|
81
|
+
"IntentResolutionEvaluator",
|
|
45
82
|
"RelevanceEvaluator",
|
|
46
83
|
"SimilarityEvaluator",
|
|
47
84
|
"QAEvaluator",
|
|
@@ -64,5 +101,10 @@ __all__ = [
|
|
|
64
101
|
"EvaluatorConfig",
|
|
65
102
|
"Conversation",
|
|
66
103
|
"Message",
|
|
67
|
-
"EvaluationResult"
|
|
104
|
+
"EvaluationResult",
|
|
105
|
+
"CodeVulnerabilityEvaluator",
|
|
106
|
+
"UngroundedAttributesEvaluator",
|
|
107
|
+
"ToolCallAccuracyEvaluator",
|
|
68
108
|
]
|
|
109
|
+
|
|
110
|
+
__all__.extend([p for p in _patch_all if p not in __all__])
|
|
@@ -52,7 +52,7 @@ class Workspace(Model):
|
|
|
52
52
|
"agents_endpoint_uri": {"readonly": True},
|
|
53
53
|
"ml_flow_tracking_uri": {"readonly": True},
|
|
54
54
|
#'notebook_info': {'readonly': True},
|
|
55
|
-
"private_endpoint_connections": {"readonly": True},
|
|
55
|
+
# "private_endpoint_connections": {"readonly": True},
|
|
56
56
|
#'private_link_count': {'readonly': True},
|
|
57
57
|
"provisioning_state": {"readonly": True},
|
|
58
58
|
"service_provisioned_resource_group": {"readonly": True},
|
|
@@ -99,10 +99,10 @@ class Workspace(Model):
|
|
|
99
99
|
#'network_acls': {'key': 'properties.networkAcls', 'type': 'NetworkAcls'},
|
|
100
100
|
#'notebook_info': {'key': 'properties.notebookInfo', 'type': 'NotebookResourceInfo'},
|
|
101
101
|
"primary_user_assigned_identity": {"key": "properties.primaryUserAssignedIdentity", "type": "str"},
|
|
102
|
-
"private_endpoint_connections": {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
},
|
|
102
|
+
# "private_endpoint_connections": {
|
|
103
|
+
# "key": "properties.privateEndpointConnections",
|
|
104
|
+
# "type": "[PrivateEndpointConnection]",
|
|
105
|
+
# },
|
|
106
106
|
"private_link_count": {"key": "properties.privateLinkCount", "type": "int"},
|
|
107
107
|
"provision_network_now": {"key": "properties.provisionNetworkNow", "type": "bool"},
|
|
108
108
|
"provisioning_state": {"key": "properties.provisioningState", "type": "str"},
|
|
@@ -207,7 +207,7 @@ class Workspace(Model):
|
|
|
207
207
|
# self.network_acls = network_acls
|
|
208
208
|
# self.notebook_info = None
|
|
209
209
|
self.primary_user_assigned_identity = primary_user_assigned_identity
|
|
210
|
-
self.private_endpoint_connections = None
|
|
210
|
+
# self.private_endpoint_connections = None
|
|
211
211
|
self.private_link_count = None
|
|
212
212
|
self.provision_network_now = provision_network_now
|
|
213
213
|
self.provisioning_state = None
|
|
@@ -5,8 +5,8 @@ from enum import Enum
|
|
|
5
5
|
|
|
6
6
|
from azure.core import CaseInsensitiveEnumMeta
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
PROMPT_BASED_REASON_EVALUATORS = ["coherence", "relevance", "retrieval", "groundedness", "fluency", "intent_resolution",
|
|
9
|
+
"tool_call_accurate", "response_completeness", "task_adherence"]
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class CommonConstants:
|
|
@@ -39,6 +39,8 @@ class Tasks:
|
|
|
39
39
|
PROTECTED_MATERIAL = "protected material"
|
|
40
40
|
XPIA = "xpia"
|
|
41
41
|
GROUNDEDNESS = "groundedness"
|
|
42
|
+
CODE_VULNERABILITY = "code vulnerability"
|
|
43
|
+
UNGROUNDED_ATTRIBUTES = "inference sensitive attributes"
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
class _InternalAnnotationTasks:
|
|
@@ -61,6 +63,8 @@ class EvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
|
61
63
|
PROTECTED_MATERIAL = "protected_material"
|
|
62
64
|
XPIA = "xpia"
|
|
63
65
|
GROUNDEDNESS = "generic_groundedness"
|
|
66
|
+
CODE_VULNERABILITY = "code_vulnerability"
|
|
67
|
+
UNGROUNDED_ATTRIBUTES = "ungrounded_attributes"
|
|
64
68
|
|
|
65
69
|
|
|
66
70
|
class _InternalEvaluationMetrics(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
@@ -42,6 +42,7 @@ USER_TEXT_TEMPLATE_DICT: Dict[str, Template] = {
|
|
|
42
42
|
"DEFAULT": Template("<Human>{$query}</><System>{$response}</>"),
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
INFERENCE_OF_SENSITIVE_ATTRIBUTES = "inference_sensitive_attributes"
|
|
45
46
|
|
|
46
47
|
def get_formatted_template(data: dict, annotation_task: str) -> str:
|
|
47
48
|
"""Given the task and input data, produce a formatted string that will serve as the main
|
|
@@ -64,6 +65,19 @@ def get_formatted_template(data: dict, annotation_task: str) -> str:
|
|
|
64
65
|
"context": data.get("context", ""),
|
|
65
66
|
}
|
|
66
67
|
return json.dumps(as_dict)
|
|
68
|
+
if annotation_task == Tasks.CODE_VULNERABILITY:
|
|
69
|
+
as_dict = {
|
|
70
|
+
"context": data.get("query", ""),
|
|
71
|
+
"completion": data.get("response", "")
|
|
72
|
+
}
|
|
73
|
+
return json.dumps(as_dict)
|
|
74
|
+
if annotation_task == Tasks.UNGROUNDED_ATTRIBUTES:
|
|
75
|
+
as_dict = {
|
|
76
|
+
"query": data.get("query", ""),
|
|
77
|
+
"response": data.get("response", ""),
|
|
78
|
+
"context": data.get("context", "")
|
|
79
|
+
}
|
|
80
|
+
return json.dumps(as_dict)
|
|
67
81
|
as_dict = {
|
|
68
82
|
"query": html.escape(data.get("query", "")),
|
|
69
83
|
"response": html.escape(data.get("response", "")),
|
|
@@ -160,6 +174,8 @@ def generate_payload(normalized_user_text: str, metric: str, annotation_task: st
|
|
|
160
174
|
task = annotation_task
|
|
161
175
|
if metric == EvaluationMetrics.PROTECTED_MATERIAL:
|
|
162
176
|
include_metric = False
|
|
177
|
+
elif metric == EvaluationMetrics.UNGROUNDED_ATTRIBUTES:
|
|
178
|
+
include_metric = False
|
|
163
179
|
elif metric == _InternalEvaluationMetrics.ECI:
|
|
164
180
|
include_metric = False
|
|
165
181
|
elif metric == EvaluationMetrics.XPIA:
|
|
@@ -251,7 +267,6 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
|
|
|
251
267
|
sleep_time = RAIService.SLEEP_TIME**request_count
|
|
252
268
|
await asyncio.sleep(sleep_time)
|
|
253
269
|
|
|
254
|
-
|
|
255
270
|
def parse_response( # pylint: disable=too-many-branches,too-many-statements
|
|
256
271
|
batch_response: List[Dict], metric_name: str, metric_display_name: Optional[str] = None
|
|
257
272
|
) -> Dict[str, Union[str, float]]:
|
|
@@ -274,10 +289,16 @@ def parse_response( # pylint: disable=too-many-branches,too-many-statements
|
|
|
274
289
|
EvaluationMetrics.PROTECTED_MATERIAL,
|
|
275
290
|
_InternalEvaluationMetrics.ECI,
|
|
276
291
|
EvaluationMetrics.XPIA,
|
|
292
|
+
EvaluationMetrics.CODE_VULNERABILITY,
|
|
293
|
+
EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
|
|
277
294
|
}:
|
|
278
295
|
result = {}
|
|
279
296
|
if not batch_response or len(batch_response[0]) == 0:
|
|
280
297
|
return {}
|
|
298
|
+
if metric_name == EvaluationMetrics.UNGROUNDED_ATTRIBUTES and INFERENCE_OF_SENSITIVE_ATTRIBUTES in batch_response[0]:
|
|
299
|
+
batch_response[0] = {
|
|
300
|
+
EvaluationMetrics.UNGROUNDED_ATTRIBUTES: batch_response[0][INFERENCE_OF_SENSITIVE_ATTRIBUTES]
|
|
301
|
+
}
|
|
281
302
|
if metric_name == EvaluationMetrics.PROTECTED_MATERIAL and metric_name not in batch_response[0]:
|
|
282
303
|
pm_metric_names = {"artwork", "fictional_characters", "logos_and_brands"}
|
|
283
304
|
for pm_metric_name in pm_metric_names:
|
|
@@ -313,6 +334,13 @@ def parse_response( # pylint: disable=too-many-branches,too-many-statements
|
|
|
313
334
|
result[metric_display_name + "_information_gathering"] = (
|
|
314
335
|
parsed_response["information_gathering"] if "information_gathering" in parsed_response else math.nan
|
|
315
336
|
)
|
|
337
|
+
if metric_name == EvaluationMetrics.CODE_VULNERABILITY or metric_name == EvaluationMetrics.UNGROUNDED_ATTRIBUTES:
|
|
338
|
+
# Add all attributes under the details.
|
|
339
|
+
details = {}
|
|
340
|
+
for key, value in parsed_response.items():
|
|
341
|
+
if key not in {"label", "reasoning", "version"}:
|
|
342
|
+
details[key.replace("-", "_")] = value
|
|
343
|
+
result[metric_display_name + "_details"] = details
|
|
316
344
|
return result
|
|
317
345
|
return _parse_content_harm_response(batch_response, metric_name, metric_display_name)
|
|
318
346
|
|
|
@@ -359,7 +387,14 @@ def _parse_content_harm_response(
|
|
|
359
387
|
|
|
360
388
|
# get content harm metric_value
|
|
361
389
|
if "label" in harm_response:
|
|
362
|
-
|
|
390
|
+
try:
|
|
391
|
+
# Handle "n/a" or other non-numeric values
|
|
392
|
+
if isinstance(harm_response["label"], str) and harm_response["label"].strip().lower() == "n/a":
|
|
393
|
+
metric_value = math.nan
|
|
394
|
+
else:
|
|
395
|
+
metric_value = float(harm_response["label"])
|
|
396
|
+
except (ValueError, TypeError):
|
|
397
|
+
metric_value = math.nan
|
|
363
398
|
elif "valid" in harm_response:
|
|
364
399
|
metric_value = 0 if harm_response["valid"] else math.nan
|
|
365
400
|
else:
|
|
@@ -390,8 +425,7 @@ def _parse_content_harm_response(
|
|
|
390
425
|
reason = ""
|
|
391
426
|
|
|
392
427
|
harm_score = metric_value
|
|
393
|
-
|
|
394
|
-
return result
|
|
428
|
+
# We've already handled the "n/a" case by converting to math.nan
|
|
395
429
|
if not math.isnan(metric_value):
|
|
396
430
|
# int(math.nan) causes a value error, and math.nan is already handled
|
|
397
431
|
# by get_harm_severity_level
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# --------------------------------------------------------------------------
|
|
3
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
4
|
+
# Licensed under the MIT License. See License.txt in the project root for license information.
|
|
5
|
+
# Code generated by Microsoft (R) Python Code Generator.
|
|
6
|
+
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
|
|
7
|
+
# --------------------------------------------------------------------------
|
|
8
|
+
# pylint: disable=wrong-import-position
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from ._patch import * # pylint: disable=unused-wildcard-import
|
|
14
|
+
|
|
15
|
+
from ._client import MachineLearningServicesClient # type: ignore
|
|
16
|
+
from ._version import VERSION
|
|
17
|
+
|
|
18
|
+
__version__ = VERSION
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from ._patch import __all__ as _patch_all
|
|
22
|
+
from ._patch import *
|
|
23
|
+
except ImportError:
|
|
24
|
+
_patch_all = []
|
|
25
|
+
from ._patch import patch_sdk as _patch_sdk
|
|
26
|
+
|
|
27
|
+
# Export GeneratedRAIClient as alias of MachineLearningServicesClient for backward compatibility
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"MachineLearningServicesClient",
|
|
31
|
+
]
|
|
32
|
+
__all__.extend([p for p in _patch_all if p not in __all__]) # pyright: ignore
|
|
33
|
+
|
|
34
|
+
_patch_sdk()
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# pylint: disable=line-too-long,useless-suppression
|
|
2
|
+
# coding=utf-8
|
|
3
|
+
# --------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
5
|
+
# Licensed under the MIT License. See License.txt in the project root for license information.
|
|
6
|
+
# Code generated by Microsoft (R) Python Code Generator.
|
|
7
|
+
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
|
|
8
|
+
# --------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
from typing import Any, TYPE_CHECKING
|
|
12
|
+
from typing_extensions import Self
|
|
13
|
+
|
|
14
|
+
from azure.core import PipelineClient
|
|
15
|
+
from azure.core.pipeline import policies
|
|
16
|
+
from azure.core.rest import HttpRequest, HttpResponse
|
|
17
|
+
|
|
18
|
+
from ._configuration import MachineLearningServicesClientConfiguration
|
|
19
|
+
from ._serialization import Deserializer, Serializer
|
|
20
|
+
from .operations import RAISvcOperations
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from azure.core.credentials import TokenCredential
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MachineLearningServicesClient:
|
|
27
|
+
"""MachineLearningServicesClient.
|
|
28
|
+
|
|
29
|
+
:ivar rai_svc: RAISvcOperations operations
|
|
30
|
+
:vartype rai_svc: raiclient.operations.RAISvcOperations
|
|
31
|
+
:param endpoint: Supported Azure-AI endpoints. Required.
|
|
32
|
+
:type endpoint: str
|
|
33
|
+
:param subscription_id: The ID of the target subscription. Required.
|
|
34
|
+
:type subscription_id: str
|
|
35
|
+
:param resource_group_name: The name of the Resource Group. Required.
|
|
36
|
+
:type resource_group_name: str
|
|
37
|
+
:param workspace_name: The name of the AzureML workspace or AI project. Required.
|
|
38
|
+
:type workspace_name: str
|
|
39
|
+
:param credential: Credential used to authenticate requests to the service. Required.
|
|
40
|
+
:type credential: ~azure.core.credentials.TokenCredential
|
|
41
|
+
:keyword api_version: The API version to use for this operation. Default value is
|
|
42
|
+
"2022-11-01-preview". Note that overriding this default value may result in unsupported
|
|
43
|
+
behavior.
|
|
44
|
+
:paramtype api_version: str
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
endpoint: str,
|
|
50
|
+
subscription_id: str,
|
|
51
|
+
resource_group_name: str,
|
|
52
|
+
workspace_name: str,
|
|
53
|
+
credential: "TokenCredential",
|
|
54
|
+
**kwargs: Any
|
|
55
|
+
) -> None:
|
|
56
|
+
_endpoint = "{endpoint}/raisvc/v1.0/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/{workspaceName}"
|
|
57
|
+
self._config = MachineLearningServicesClientConfiguration(
|
|
58
|
+
endpoint=endpoint,
|
|
59
|
+
subscription_id=subscription_id,
|
|
60
|
+
resource_group_name=resource_group_name,
|
|
61
|
+
workspace_name=workspace_name,
|
|
62
|
+
credential=credential,
|
|
63
|
+
**kwargs
|
|
64
|
+
)
|
|
65
|
+
_policies = kwargs.pop("policies", None)
|
|
66
|
+
if _policies is None:
|
|
67
|
+
_policies = [
|
|
68
|
+
policies.RequestIdPolicy(**kwargs),
|
|
69
|
+
self._config.headers_policy,
|
|
70
|
+
self._config.user_agent_policy,
|
|
71
|
+
self._config.proxy_policy,
|
|
72
|
+
policies.ContentDecodePolicy(**kwargs),
|
|
73
|
+
self._config.redirect_policy,
|
|
74
|
+
self._config.retry_policy,
|
|
75
|
+
self._config.authentication_policy,
|
|
76
|
+
self._config.custom_hook_policy,
|
|
77
|
+
self._config.logging_policy,
|
|
78
|
+
policies.DistributedTracingPolicy(**kwargs),
|
|
79
|
+
policies.SensitiveHeaderCleanupPolicy(**kwargs) if self._config.redirect_policy else None,
|
|
80
|
+
self._config.http_logging_policy,
|
|
81
|
+
]
|
|
82
|
+
self._client: PipelineClient = PipelineClient(base_url=_endpoint, policies=_policies, **kwargs)
|
|
83
|
+
|
|
84
|
+
self._serialize = Serializer()
|
|
85
|
+
self._deserialize = Deserializer()
|
|
86
|
+
self._serialize.client_side_validation = False
|
|
87
|
+
self.rai_svc = RAISvcOperations(self._client, self._config, self._serialize, self._deserialize)
|
|
88
|
+
|
|
89
|
+
def send_request(self, request: HttpRequest, *, stream: bool = False, **kwargs: Any) -> HttpResponse:
|
|
90
|
+
"""Runs the network request through the client's chained policies.
|
|
91
|
+
|
|
92
|
+
>>> from azure.core.rest import HttpRequest
|
|
93
|
+
>>> request = HttpRequest("GET", "https://www.example.org/")
|
|
94
|
+
<HttpRequest [GET], url: 'https://www.example.org/'>
|
|
95
|
+
>>> response = client.send_request(request)
|
|
96
|
+
<HttpResponse: 200 OK>
|
|
97
|
+
|
|
98
|
+
For more information on this code flow, see https://aka.ms/azsdk/dpcodegen/python/send_request
|
|
99
|
+
|
|
100
|
+
:param request: The network request you want to make. Required.
|
|
101
|
+
:type request: ~azure.core.rest.HttpRequest
|
|
102
|
+
:keyword bool stream: Whether the response payload will be streamed. Defaults to False.
|
|
103
|
+
:return: The response of your network call. Does not do error handling on your response.
|
|
104
|
+
:rtype: ~azure.core.rest.HttpResponse
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
request_copy = deepcopy(request)
|
|
108
|
+
path_format_arguments = {
|
|
109
|
+
"endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True),
|
|
110
|
+
"subscriptionId": self._serialize.url("self._config.subscription_id", self._config.subscription_id, "str"),
|
|
111
|
+
"resourceGroupName": self._serialize.url(
|
|
112
|
+
"self._config.resource_group_name", self._config.resource_group_name, "str"
|
|
113
|
+
),
|
|
114
|
+
"workspaceName": self._serialize.url("self._config.workspace_name", self._config.workspace_name, "str"),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
request_copy.url = self._client.format_url(request_copy.url, **path_format_arguments)
|
|
118
|
+
return self._client.send_request(request_copy, stream=stream, **kwargs) # type: ignore
|
|
119
|
+
|
|
120
|
+
def close(self) -> None:
|
|
121
|
+
self._client.close()
|
|
122
|
+
|
|
123
|
+
def __enter__(self) -> Self:
|
|
124
|
+
self._client.__enter__()
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
def __exit__(self, *exc_details: Any) -> None:
|
|
128
|
+
self._client.__exit__(*exc_details)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# --------------------------------------------------------------------------
|
|
3
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
4
|
+
# Licensed under the MIT License. See License.txt in the project root for license information.
|
|
5
|
+
# Code generated by Microsoft (R) Python Code Generator.
|
|
6
|
+
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
|
|
7
|
+
# --------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
from typing import Any, TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from azure.core.pipeline import policies
|
|
12
|
+
|
|
13
|
+
from ._version import VERSION
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from azure.core.credentials import TokenCredential
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MachineLearningServicesClientConfiguration: # pylint: disable=too-many-instance-attributes,name-too-long
|
|
20
|
+
"""Configuration for MachineLearningServicesClient.
|
|
21
|
+
|
|
22
|
+
Note that all parameters used to create this instance are saved as instance
|
|
23
|
+
attributes.
|
|
24
|
+
|
|
25
|
+
:param endpoint: Supported Azure-AI endpoints. Required.
|
|
26
|
+
:type endpoint: str
|
|
27
|
+
:param subscription_id: The ID of the target subscription. Required.
|
|
28
|
+
:type subscription_id: str
|
|
29
|
+
:param resource_group_name: The name of the Resource Group. Required.
|
|
30
|
+
:type resource_group_name: str
|
|
31
|
+
:param workspace_name: The name of the AzureML workspace or AI project. Required.
|
|
32
|
+
:type workspace_name: str
|
|
33
|
+
:param credential: Credential used to authenticate requests to the service. Required.
|
|
34
|
+
:type credential: ~azure.core.credentials.TokenCredential
|
|
35
|
+
:keyword api_version: The API version to use for this operation. Default value is
|
|
36
|
+
"2022-11-01-preview". Note that overriding this default value may result in unsupported
|
|
37
|
+
behavior.
|
|
38
|
+
:paramtype api_version: str
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
endpoint: str,
|
|
44
|
+
subscription_id: str,
|
|
45
|
+
resource_group_name: str,
|
|
46
|
+
workspace_name: str,
|
|
47
|
+
credential: "TokenCredential",
|
|
48
|
+
**kwargs: Any
|
|
49
|
+
) -> None:
|
|
50
|
+
api_version: str = kwargs.pop("api_version", "2022-11-01-preview")
|
|
51
|
+
|
|
52
|
+
if endpoint is None:
|
|
53
|
+
raise ValueError("Parameter 'endpoint' must not be None.")
|
|
54
|
+
if subscription_id is None:
|
|
55
|
+
raise ValueError("Parameter 'subscription_id' must not be None.")
|
|
56
|
+
if resource_group_name is None:
|
|
57
|
+
raise ValueError("Parameter 'resource_group_name' must not be None.")
|
|
58
|
+
if workspace_name is None:
|
|
59
|
+
raise ValueError("Parameter 'workspace_name' must not be None.")
|
|
60
|
+
if credential is None:
|
|
61
|
+
raise ValueError("Parameter 'credential' must not be None.")
|
|
62
|
+
|
|
63
|
+
self.endpoint = endpoint
|
|
64
|
+
self.subscription_id = subscription_id
|
|
65
|
+
self.resource_group_name = resource_group_name
|
|
66
|
+
self.workspace_name = workspace_name
|
|
67
|
+
self.credential = credential
|
|
68
|
+
self.api_version = api_version
|
|
69
|
+
self.credential_scopes = kwargs.pop("credential_scopes", ["https://ml.azure.com/.default"])
|
|
70
|
+
kwargs.setdefault("sdk_moniker", "rai_client/{}".format(VERSION))
|
|
71
|
+
self.polling_interval = kwargs.get("polling_interval", 30)
|
|
72
|
+
self._configure(**kwargs)
|
|
73
|
+
|
|
74
|
+
def _configure(self, **kwargs: Any) -> None:
|
|
75
|
+
self.user_agent_policy = kwargs.get("user_agent_policy") or policies.UserAgentPolicy(**kwargs)
|
|
76
|
+
self.headers_policy = kwargs.get("headers_policy") or policies.HeadersPolicy(**kwargs)
|
|
77
|
+
self.proxy_policy = kwargs.get("proxy_policy") or policies.ProxyPolicy(**kwargs)
|
|
78
|
+
self.logging_policy = kwargs.get("logging_policy") or policies.NetworkTraceLoggingPolicy(**kwargs)
|
|
79
|
+
self.http_logging_policy = kwargs.get("http_logging_policy") or policies.HttpLoggingPolicy(**kwargs)
|
|
80
|
+
self.custom_hook_policy = kwargs.get("custom_hook_policy") or policies.CustomHookPolicy(**kwargs)
|
|
81
|
+
self.redirect_policy = kwargs.get("redirect_policy") or policies.RedirectPolicy(**kwargs)
|
|
82
|
+
self.retry_policy = kwargs.get("retry_policy") or policies.RetryPolicy(**kwargs)
|
|
83
|
+
self.authentication_policy = kwargs.get("authentication_policy")
|
|
84
|
+
if self.credential and not self.authentication_policy:
|
|
85
|
+
self.authentication_policy = policies.BearerTokenCredentialPolicy(
|
|
86
|
+
self.credential, *self.credential_scopes, **kwargs
|
|
87
|
+
)
|