azure-ai-evaluation 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. azure/ai/evaluation/__init__.py +13 -2
  2. azure/ai/evaluation/_aoai/__init__.py +1 -1
  3. azure/ai/evaluation/_aoai/aoai_grader.py +21 -11
  4. azure/ai/evaluation/_aoai/label_grader.py +3 -2
  5. azure/ai/evaluation/_aoai/score_model_grader.py +90 -0
  6. azure/ai/evaluation/_aoai/string_check_grader.py +3 -2
  7. azure/ai/evaluation/_aoai/text_similarity_grader.py +3 -2
  8. azure/ai/evaluation/_azure/_envs.py +9 -10
  9. azure/ai/evaluation/_azure/_token_manager.py +7 -1
  10. azure/ai/evaluation/_common/constants.py +11 -2
  11. azure/ai/evaluation/_common/evaluation_onedp_client.py +32 -26
  12. azure/ai/evaluation/_common/onedp/__init__.py +32 -32
  13. azure/ai/evaluation/_common/onedp/_client.py +136 -139
  14. azure/ai/evaluation/_common/onedp/_configuration.py +70 -73
  15. azure/ai/evaluation/_common/onedp/_patch.py +21 -21
  16. azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
  17. azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
  18. azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
  19. azure/ai/evaluation/_common/onedp/_validation.py +50 -50
  20. azure/ai/evaluation/_common/onedp/_version.py +9 -9
  21. azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -29
  22. azure/ai/evaluation/_common/onedp/aio/_client.py +138 -143
  23. azure/ai/evaluation/_common/onedp/aio/_configuration.py +70 -75
  24. azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -21
  25. azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +37 -39
  26. azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +4832 -4494
  27. azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -21
  28. azure/ai/evaluation/_common/onedp/models/__init__.py +168 -142
  29. azure/ai/evaluation/_common/onedp/models/_enums.py +230 -162
  30. azure/ai/evaluation/_common/onedp/models/_models.py +2685 -2228
  31. azure/ai/evaluation/_common/onedp/models/_patch.py +21 -21
  32. azure/ai/evaluation/_common/onedp/operations/__init__.py +37 -39
  33. azure/ai/evaluation/_common/onedp/operations/_operations.py +6106 -5657
  34. azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -21
  35. azure/ai/evaluation/_common/rai_service.py +86 -50
  36. azure/ai/evaluation/_common/raiclient/__init__.py +1 -1
  37. azure/ai/evaluation/_common/raiclient/operations/_operations.py +14 -1
  38. azure/ai/evaluation/_common/utils.py +124 -3
  39. azure/ai/evaluation/_constants.py +2 -1
  40. azure/ai/evaluation/_converters/__init__.py +1 -1
  41. azure/ai/evaluation/_converters/_ai_services.py +9 -8
  42. azure/ai/evaluation/_converters/_models.py +46 -0
  43. azure/ai/evaluation/_converters/_sk_services.py +495 -0
  44. azure/ai/evaluation/_eval_mapping.py +2 -2
  45. azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +4 -4
  46. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +2 -2
  47. azure/ai/evaluation/_evaluate/_evaluate.py +60 -54
  48. azure/ai/evaluation/_evaluate/_evaluate_aoai.py +130 -89
  49. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +0 -1
  50. azure/ai/evaluation/_evaluate/_utils.py +24 -15
  51. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +3 -3
  52. azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +12 -11
  53. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -5
  54. azure/ai/evaluation/_evaluators/_common/_base_eval.py +15 -5
  55. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +24 -9
  56. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +6 -1
  57. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +13 -13
  58. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +7 -7
  59. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +7 -7
  60. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +7 -7
  61. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +6 -6
  62. azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +1 -5
  63. azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +34 -64
  64. azure/ai/evaluation/_evaluators/_eci/_eci.py +3 -3
  65. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +4 -4
  66. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -2
  67. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +3 -3
  68. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +11 -7
  69. azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +30 -25
  70. azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +210 -96
  71. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +2 -3
  72. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +6 -6
  73. azure/ai/evaluation/_evaluators/_qa/_qa.py +4 -4
  74. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +8 -13
  75. azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +20 -25
  76. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +4 -4
  77. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +21 -21
  78. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +5 -5
  79. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +3 -3
  80. azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +11 -14
  81. azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +43 -34
  82. azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +3 -3
  83. azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +12 -11
  84. azure/ai/evaluation/_evaluators/_xpia/xpia.py +6 -6
  85. azure/ai/evaluation/_exceptions.py +10 -0
  86. azure/ai/evaluation/_http_utils.py +3 -3
  87. azure/ai/evaluation/_legacy/_batch_engine/_engine.py +3 -3
  88. azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +5 -2
  89. azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +5 -10
  90. azure/ai/evaluation/_legacy/_batch_engine/_utils.py +1 -4
  91. azure/ai/evaluation/_legacy/_common/_async_token_provider.py +12 -19
  92. azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +2 -0
  93. azure/ai/evaluation/_legacy/prompty/_prompty.py +11 -5
  94. azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
  95. azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +193 -111
  96. azure/ai/evaluation/_user_agent.py +32 -1
  97. azure/ai/evaluation/_version.py +1 -1
  98. azure/ai/evaluation/red_team/__init__.py +3 -1
  99. azure/ai/evaluation/red_team/_agent/__init__.py +1 -1
  100. azure/ai/evaluation/red_team/_agent/_agent_functions.py +68 -71
  101. azure/ai/evaluation/red_team/_agent/_agent_tools.py +103 -145
  102. azure/ai/evaluation/red_team/_agent/_agent_utils.py +26 -6
  103. azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +62 -71
  104. azure/ai/evaluation/red_team/_attack_objective_generator.py +94 -52
  105. azure/ai/evaluation/red_team/_attack_strategy.py +2 -1
  106. azure/ai/evaluation/red_team/_callback_chat_target.py +4 -9
  107. azure/ai/evaluation/red_team/_default_converter.py +1 -1
  108. azure/ai/evaluation/red_team/_red_team.py +1286 -739
  109. azure/ai/evaluation/red_team/_red_team_result.py +43 -38
  110. azure/ai/evaluation/red_team/_utils/__init__.py +1 -1
  111. azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +32 -32
  112. azure/ai/evaluation/red_team/_utils/_rai_service_target.py +163 -138
  113. azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +14 -14
  114. azure/ai/evaluation/red_team/_utils/constants.py +2 -12
  115. azure/ai/evaluation/red_team/_utils/formatting_utils.py +41 -44
  116. azure/ai/evaluation/red_team/_utils/logging_utils.py +17 -17
  117. azure/ai/evaluation/red_team/_utils/metric_mapping.py +31 -4
  118. azure/ai/evaluation/red_team/_utils/strategy_utils.py +33 -25
  119. azure/ai/evaluation/simulator/_adversarial_scenario.py +2 -0
  120. azure/ai/evaluation/simulator/_adversarial_simulator.py +26 -15
  121. azure/ai/evaluation/simulator/_conversation/__init__.py +2 -2
  122. azure/ai/evaluation/simulator/_direct_attack_simulator.py +8 -8
  123. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +5 -5
  124. azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +54 -24
  125. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +7 -1
  126. azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +10 -8
  127. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +19 -31
  128. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +20 -6
  129. azure/ai/evaluation/simulator/_model_tools/models.py +1 -1
  130. azure/ai/evaluation/simulator/_simulator.py +9 -8
  131. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/METADATA +15 -1
  132. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/RECORD +135 -131
  133. azure/ai/evaluation/_common/onedp/aio/_vendor.py +0 -40
  134. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/NOTICE.txt +0 -0
  135. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/WHEEL +0 -0
  136. {azure_ai_evaluation-1.8.0.dist-info → azure_ai_evaluation-1.9.0.dist-info}/top_level.txt +0 -0
@@ -11,21 +11,15 @@ from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedId
11
11
  from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
12
12
  from azure.ai.evaluation._azure._envs import AzureEnvironmentClient
13
13
 
14
+
14
15
  class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
15
16
  """Asynchronous token provider for Azure services that supports non-default Azure clouds
16
17
  (e.g. Azure China, Azure US Government, etc.)."""
17
18
 
18
- def __init__(
19
- self,
20
- *,
21
- base_url: Optional[str] = None,
22
- **kwargs: Any
23
- ) -> None:
19
+ def __init__(self, *, base_url: Optional[str] = None, **kwargs: Any) -> None:
24
20
  """Initialize the AsyncAzureTokenProvider."""
25
21
  self._credential: Optional[TokenCredential] = None
26
- self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(
27
- base_url=base_url,
28
- **kwargs)
22
+ self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(base_url=base_url, **kwargs)
29
23
 
30
24
  async def close(self) -> None:
31
25
  if self._env_client:
@@ -50,14 +44,10 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
50
44
  f"{self.__class__.__name__} could not determine the credential to use.",
51
45
  target=ErrorTarget.UNKNOWN,
52
46
  category=ErrorCategory.INVALID_VALUE,
53
- blame=ErrorBlame.SYSTEM_ERROR)
47
+ blame=ErrorBlame.SYSTEM_ERROR,
48
+ )
54
49
 
55
- return self._credential.get_token(
56
- *scopes,
57
- claims=claims,
58
- tenant_id=tenant_id,
59
- enable_cae=enable_cae,
60
- **kwargs)
50
+ return self._credential.get_token(*scopes, claims=claims, tenant_id=tenant_id, enable_cae=enable_cae, **kwargs)
61
51
 
62
52
  async def __aenter__(self) -> "AsyncAzureTokenProvider":
63
53
  self._credential = await self._initialize_async(self._env_client)
@@ -67,7 +57,7 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
67
57
  self,
68
58
  exc_type: Optional[type] = None,
69
59
  exc_value: Optional[BaseException] = None,
70
- traceback: Optional[Any] = None
60
+ traceback: Optional[Any] = None,
71
61
  ) -> None:
72
62
  await self.close()
73
63
 
@@ -80,7 +70,8 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
80
70
  f"{AsyncAzureTokenProvider.__name__} instance has already been closed.",
81
71
  target=ErrorTarget.UNKNOWN,
82
72
  category=ErrorCategory.INVALID_VALUE,
83
- blame=ErrorBlame.USER_ERROR)
73
+ blame=ErrorBlame.USER_ERROR,
74
+ )
84
75
 
85
76
  cloud_name: str = await client.get_default_cloud_name_async()
86
77
  if cloud_name != client.DEFAULT_AZURE_CLOUD_NAME:
@@ -92,7 +83,8 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
92
83
  f"Failed to get metadata for cloud '{cloud_name}'.",
93
84
  target=ErrorTarget.UNKNOWN,
94
85
  category=ErrorCategory.INVALID_VALUE,
95
- blame=ErrorBlame.USER_ERROR)
86
+ blame=ErrorBlame.USER_ERROR,
87
+ )
96
88
 
97
89
  authority = metadata.get("active_directory_endpoint")
98
90
  return DefaultAzureCredential(authority=authority, exclude_shared_token_cache_credential=True)
@@ -100,6 +92,7 @@ class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
100
92
  # using Azure on behalf of credentials requires the use of the azure-ai-ml package
101
93
  try:
102
94
  from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
95
+
103
96
  return AzureMLOnBehalfOfCredential() # type: ignore
104
97
  except (ModuleNotFoundError, ImportError):
105
98
  raise EvaluationException( # pylint: disable=raise-missing-from
@@ -7,8 +7,10 @@ from concurrent.futures import ThreadPoolExecutor
7
7
  from functools import partial
8
8
  from typing_extensions import override
9
9
 
10
+
10
11
  class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
11
12
  """ThreadPoolExecutor that preserves context variables across threads."""
13
+
12
14
  @override
13
15
  def submit(self, fn, *args, **kwargs):
14
16
  context = contextvars.copy_context()
@@ -40,7 +40,7 @@ from azure.ai.evaluation._legacy.prompty._utils import (
40
40
  from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
41
41
  from azure.ai.evaluation._legacy._common._logging import get_logger
42
42
  from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
43
-
43
+ from azure.ai.evaluation._user_agent import UserAgentSingleton
44
44
 
45
45
  PROMPTY_EXTENSION: Final[str] = ".prompty"
46
46
 
@@ -168,8 +168,9 @@ class AsyncPrompty:
168
168
  self._outputs: Dict[str, Any] = configs.get("outputs", {})
169
169
  self._name: str = configs.get("name", path.stem)
170
170
  self._logger = logger or get_logger(__name__)
171
- self._token_credential: Union[TokenCredential, AsyncTokenCredential] = \
171
+ self._token_credential: Union[TokenCredential, AsyncTokenCredential] = (
172
172
  token_credential or AsyncAzureTokenProvider()
173
+ )
173
174
 
174
175
  @property
175
176
  def path(self) -> Path:
@@ -290,6 +291,8 @@ class AsyncPrompty:
290
291
  # for better debugging and real-time status updates.
291
292
  max_retries = 0
292
293
 
294
+ default_headers = {"User-Agent": UserAgentSingleton().value}
295
+
293
296
  api_client: Union[AsyncAzureOpenAI, AsyncOpenAI]
294
297
  if isinstance(connection, AzureOpenAIConnection):
295
298
  api_client = AsyncAzureOpenAI(
@@ -298,9 +301,10 @@ class AsyncPrompty:
298
301
  azure_deployment=connection.azure_deployment,
299
302
  api_version=connection.api_version,
300
303
  max_retries=max_retries,
301
- azure_ad_token_provider=(self.get_token_provider(self._token_credential)
302
- if not connection.api_key
303
- else None),
304
+ azure_ad_token_provider=(
305
+ self.get_token_provider(self._token_credential) if not connection.api_key else None
306
+ ),
307
+ default_headers=default_headers,
304
308
  )
305
309
  elif isinstance(connection, OpenAIConnection):
306
310
  api_client = AsyncOpenAI(
@@ -308,6 +312,7 @@ class AsyncPrompty:
308
312
  api_key=connection.api_key,
309
313
  organization=connection.organization,
310
314
  max_retries=max_retries,
315
+ default_headers=default_headers,
311
316
  )
312
317
  else:
313
318
  raise NotSupportedError(
@@ -414,6 +419,7 @@ class AsyncPrompty:
414
419
  :return: The token provider if a credential is provided, otherwise None.
415
420
  :rtype: Optional[AsyncAzureADTokenProvider]
416
421
  """
422
+
417
423
  async def _wrapper() -> str:
418
424
  token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
419
425
  if isinstance(token, Awaitable):
@@ -1,3 +1,3 @@
1
1
  # ---------------------------------------------------------
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
- # ---------------------------------------------------------
3
+ # ---------------------------------------------------------