azure-ai-evaluation 1.7.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/ai/evaluation/__init__.py +13 -2
- azure/ai/evaluation/_aoai/__init__.py +1 -1
- azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
- azure/ai/evaluation/_aoai/label_grader.py +3 -2
- azure/ai/evaluation/_aoai/score_model_grader.py +90 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
- azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
- azure/ai/evaluation/_azure/_envs.py +9 -10
- azure/ai/evaluation/_azure/_token_manager.py +7 -1
- azure/ai/evaluation/_common/constants.py +11 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
- azure/ai/evaluation/_common/onedp/__init__.py +32 -32
- azure/ai/evaluation/_common/onedp/_client.py +136 -139
- azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
- azure/ai/evaluation/_common/onedp/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +50 -50
- azure/ai/evaluation/_common/onedp/_version.py +9 -9
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
- azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
- azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
- azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
- azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
- azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5655
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
- azure/ai/evaluation/_common/rai_service.py +86 -50
- azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
- azure/ai/evaluation/_common/utils.py +124 -3
- azure/ai/evaluation/_constants.py +2 -1
- azure/ai/evaluation/_converters/__init__.py +1 -1
- azure/ai/evaluation/_converters/_ai_services.py +9 -8
- azure/ai/evaluation/_converters/_models.py +46 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +2 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +4 -4
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +64 -58
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +130 -89
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
- azure/ai/evaluation/_evaluate/_utils.py +24 -15
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +3 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +12 -11
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -5
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +15 -5
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -1
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +13 -13
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +7 -7
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +6 -6
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +34 -64
- azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +4 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +3 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -7
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +30 -25
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +2 -3
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +6 -6
- azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -4
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +8 -13
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -25
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +4 -4
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +25 -25
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +5 -5
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -3
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -14
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +43 -34
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +3 -3
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +12 -11
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +6 -6
- azure/ai/evaluation/_exceptions.py +10 -0
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +3 -3
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +5 -10
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +193 -111
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +3 -1
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
- azure/ai/evaluation/red_team/_attack_strategy.py +4 -1
- azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
- azure/ai/evaluation/red_team/_default_converter.py +1 -1
- azure/ai/evaluation/red_team/_red_team.py +1622 -765
- azure/ai/evaluation/red_team/_red_team_result.py +43 -38
- azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +121 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +595 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +108 -0
- azure/ai/evaluation/red_team/_utils/constants.py +6 -12
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
- azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +33 -6
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +35 -25
- azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +34 -16
- azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +5 -5
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -23
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +25 -15
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
- azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
- azure/ai/evaluation/simulator/_simulator.py +9 -8
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/METADATA +24 -1
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/RECORD +135 -123
- azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.7.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/top_level.txt +0 -0
|
@@ -108,16 +108,13 @@ class RunSubmitter:
|
|
|
108
108
|
f"Referenced run {previous.name} has {len(previous.outputs)} outputs, "
|
|
109
109
|
f"but {len(run.inputs)} inputs are provided."
|
|
110
110
|
)
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
# load in the previous run's outputs and inputs into the list of dictionaries to allow for
|
|
113
113
|
# the previous run's outputs to be used as inputs for the current run
|
|
114
114
|
run.inputs = [
|
|
115
|
-
{
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
**run.inputs[i]
|
|
119
|
-
}
|
|
120
|
-
for i in range(len(run.inputs))]
|
|
115
|
+
{"run.outputs": previous.outputs[i], "run.inputs": previous.inputs[i], **run.inputs[i]}
|
|
116
|
+
for i in range(len(run.inputs))
|
|
117
|
+
]
|
|
121
118
|
|
|
122
119
|
self._validate_column_mapping(run.column_mapping)
|
|
123
120
|
|
|
@@ -197,9 +194,7 @@ class RunSubmitter:
|
|
|
197
194
|
@staticmethod
|
|
198
195
|
def _validate_inputs(run: Run):
|
|
199
196
|
if not run.inputs and not run.previous_run:
|
|
200
|
-
raise BatchEngineValidationError(
|
|
201
|
-
"Either data, or a previous run must be specified for the evaluation run."
|
|
202
|
-
)
|
|
197
|
+
raise BatchEngineValidationError("Either data, or a previous run must be specified for the evaluation run.")
|
|
203
198
|
|
|
204
199
|
@staticmethod
|
|
205
200
|
def _validate_column_mapping(column_mapping: Mapping[str, str]):
|
|
@@ -94,7 +94,4 @@ def is_async_callable(obj: Any) -> bool:
|
|
|
94
94
|
:return: True if the object is an async callable.
|
|
95
95
|
:rtype: bool
|
|
96
96
|
"""
|
|
97
|
-
return (
|
|
98
|
-
inspect.iscoroutinefunction(obj)
|
|
99
|
-
or inspect.iscoroutinefunction(getattr(obj, "__call__", None))
|
|
100
|
-
)
|
|
97
|
+
return inspect.iscoroutinefunction(obj) or inspect.iscoroutinefunction(getattr(obj, "__call__", None))
|
|
@@ -11,21 +11,15 @@ from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedId
|
|
|
11
11
|
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
12
12
|
from azure.ai.evaluation._azure._envs import AzureEnvironmentClient
|
|
13
13
|
|
|
14
|
+
|
|
14
15
|
class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
15
16
|
"""Asynchronous token provider for Azure services that supports non-default Azure clouds
|
|
16
17
|
(e.g. Azure China, Azure US Government, etc.)."""
|
|
17
18
|
|
|
18
|
-
def __init__(
|
|
19
|
-
self,
|
|
20
|
-
*,
|
|
21
|
-
base_url: Optional[str] = None,
|
|
22
|
-
**kwargs: Any
|
|
23
|
-
) -> None:
|
|
19
|
+
def __init__(self, *, base_url: Optional[str] = None, **kwargs: Any) -> None:
|
|
24
20
|
"""Initialize the AsyncAzureTokenProvider."""
|
|
25
21
|
self._credential: Optional[TokenCredential] = None
|
|
26
|
-
self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(
|
|
27
|
-
base_url=base_url,
|
|
28
|
-
**kwargs)
|
|
22
|
+
self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(base_url=base_url, **kwargs)
|
|
29
23
|
|
|
30
24
|
async def close(self) -> None:
|
|
31
25
|
if self._env_client:
|
|
@@ -50,14 +44,10 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
|
50
44
|
f"{self.__class__.__name__} could not determine the credential to use.",
|
|
51
45
|
target=ErrorTarget.UNKNOWN,
|
|
52
46
|
category=ErrorCategory.INVALID_VALUE,
|
|
53
|
-
blame=ErrorBlame.SYSTEM_ERROR
|
|
47
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
48
|
+
)
|
|
54
49
|
|
|
55
|
-
return self._credential.get_token(
|
|
56
|
-
*scopes,
|
|
57
|
-
claims=claims,
|
|
58
|
-
tenant_id=tenant_id,
|
|
59
|
-
enable_cae=enable_cae,
|
|
60
|
-
**kwargs)
|
|
50
|
+
return self._credential.get_token(*scopes, claims=claims, tenant_id=tenant_id, enable_cae=enable_cae, **kwargs)
|
|
61
51
|
|
|
62
52
|
async def __aenter__(self) -> "AsyncAzureTokenProvider":
|
|
63
53
|
self._credential = await self._initialize_async(self._env_client)
|
|
@@ -67,7 +57,7 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
|
67
57
|
self,
|
|
68
58
|
exc_type: Optional[type] = None,
|
|
69
59
|
exc_value: Optional[BaseException] = None,
|
|
70
|
-
traceback: Optional[Any] = None
|
|
60
|
+
traceback: Optional[Any] = None,
|
|
71
61
|
) -> None:
|
|
72
62
|
await self.close()
|
|
73
63
|
|
|
@@ -80,7 +70,8 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
|
80
70
|
f"{AsyncAzureTokenProvider.__name__} instance has already been closed.",
|
|
81
71
|
target=ErrorTarget.UNKNOWN,
|
|
82
72
|
category=ErrorCategory.INVALID_VALUE,
|
|
83
|
-
blame=ErrorBlame.USER_ERROR
|
|
73
|
+
blame=ErrorBlame.USER_ERROR,
|
|
74
|
+
)
|
|
84
75
|
|
|
85
76
|
cloud_name: str = await client.get_default_cloud_name_async()
|
|
86
77
|
if cloud_name != client.DEFAULT_AZURE_CLOUD_NAME:
|
|
@@ -92,7 +83,8 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
|
92
83
|
f"Failed to get metadata for cloud '{cloud_name}'.",
|
|
93
84
|
target=ErrorTarget.UNKNOWN,
|
|
94
85
|
category=ErrorCategory.INVALID_VALUE,
|
|
95
|
-
blame=ErrorBlame.USER_ERROR
|
|
86
|
+
blame=ErrorBlame.USER_ERROR,
|
|
87
|
+
)
|
|
96
88
|
|
|
97
89
|
authority = metadata.get("active_directory_endpoint")
|
|
98
90
|
return DefaultAzureCredential(authority=authority, exclude_shared_token_cache_credential=True)
|
|
@@ -100,6 +92,7 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
|
100
92
|
# using Azure on behalf of credentials requires the use of the azure-ai-ml package
|
|
101
93
|
try:
|
|
102
94
|
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
|
|
95
|
+
|
|
103
96
|
return AzureMLOnBehalfOfCredential() # type: ignore
|
|
104
97
|
except (ModuleNotFoundError, ImportError):
|
|
105
98
|
raise EvaluationException( # pylint: disable=raise-missing-from
|
|
@@ -7,8 +7,10 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
7
7
|
from functools import partial
|
|
8
8
|
from typing_extensions import override
|
|
9
9
|
|
|
10
|
+
|
|
10
11
|
class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
|
|
11
12
|
"""ThreadPoolExecutor that preserves context variables across threads."""
|
|
13
|
+
|
|
12
14
|
@override
|
|
13
15
|
def submit(self, fn, *args, **kwargs):
|
|
14
16
|
context = contextvars.copy_context()
|
|
@@ -40,7 +40,7 @@ from azure.ai.evaluation._legacy.prompty._utils import (
|
|
|
40
40
|
from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
|
|
41
41
|
from azure.ai.evaluation._legacy._common._logging import get_logger
|
|
42
42
|
from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
|
|
43
|
-
|
|
43
|
+
from azure.ai.evaluation._user_agent import UserAgentSingleton
|
|
44
44
|
|
|
45
45
|
PROMPTY_EXTENSION: Final[str] = ".prompty"
|
|
46
46
|
|
|
@@ -168,8 +168,9 @@ class AsyncPrompty:
|
|
|
168
168
|
self._outputs: Dict[str, Any] = configs.get("outputs", {})
|
|
169
169
|
self._name: str = configs.get("name", path.stem)
|
|
170
170
|
self._logger = logger or get_logger(__name__)
|
|
171
|
-
self._token_credential: Union[TokenCredential, AsyncTokenCredential] =
|
|
171
|
+
self._token_credential: Union[TokenCredential, AsyncTokenCredential] = (
|
|
172
172
|
token_credential or AsyncAzureTokenProvider()
|
|
173
|
+
)
|
|
173
174
|
|
|
174
175
|
@property
|
|
175
176
|
def path(self) -> Path:
|
|
@@ -290,6 +291,8 @@ class AsyncPrompty:
|
|
|
290
291
|
# for better debugging and real-time status updates.
|
|
291
292
|
max_retries = 0
|
|
292
293
|
|
|
294
|
+
default_headers = {"User-Agent": UserAgentSingleton().value}
|
|
295
|
+
|
|
293
296
|
api_client: Union[AsyncAzureOpenAI, AsyncOpenAI]
|
|
294
297
|
if isinstance(connection, AzureOpenAIConnection):
|
|
295
298
|
api_client = AsyncAzureOpenAI(
|
|
@@ -298,9 +301,10 @@ class AsyncPrompty:
|
|
|
298
301
|
azure_deployment=connection.azure_deployment,
|
|
299
302
|
api_version=connection.api_version,
|
|
300
303
|
max_retries=max_retries,
|
|
301
|
-
azure_ad_token_provider=(
|
|
302
|
-
if not connection.api_key
|
|
303
|
-
|
|
304
|
+
azure_ad_token_provider=(
|
|
305
|
+
self.get_token_provider(self._token_credential) if not connection.api_key else None
|
|
306
|
+
),
|
|
307
|
+
default_headers=default_headers,
|
|
304
308
|
)
|
|
305
309
|
elif isinstance(connection, OpenAIConnection):
|
|
306
310
|
api_client = AsyncOpenAI(
|
|
@@ -308,6 +312,7 @@ class AsyncPrompty:
|
|
|
308
312
|
api_key=connection.api_key,
|
|
309
313
|
organization=connection.organization,
|
|
310
314
|
max_retries=max_retries,
|
|
315
|
+
default_headers=default_headers,
|
|
311
316
|
)
|
|
312
317
|
else:
|
|
313
318
|
raise NotSupportedError(
|
|
@@ -414,6 +419,7 @@ class AsyncPrompty:
|
|
|
414
419
|
:return: The token provider if a credential is provided, otherwise None.
|
|
415
420
|
:rtype: Optional[AsyncAzureADTokenProvider]
|
|
416
421
|
"""
|
|
422
|
+
|
|
417
423
|
async def _wrapper() -> str:
|
|
418
424
|
token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
|
|
419
425
|
if isinstance(token, Awaitable):
|