azure-ai-evaluation 1.10.0__py3-none-any.whl → 1.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/_common/onedp/models/_models.py +5 -0
- azure/ai/evaluation/_converters/_ai_services.py +60 -10
- azure/ai/evaluation/_converters/_models.py +75 -26
- azure/ai/evaluation/_evaluate/_eval_run.py +14 -1
- azure/ai/evaluation/_evaluate/_evaluate.py +13 -4
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +104 -35
- azure/ai/evaluation/_evaluate/_utils.py +4 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +2 -1
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +113 -19
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +7 -2
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +1 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +2 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +113 -3
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +8 -2
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +2 -1
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +10 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +2 -1
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +2 -1
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +8 -2
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +104 -60
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +58 -41
- azure/ai/evaluation/_exceptions.py +1 -0
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +2 -1
- azure/ai/evaluation/red_team/_attack_objective_generator.py +17 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +14 -1
- azure/ai/evaluation/red_team/_evaluation_processor.py +376 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +322 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +661 -0
- azure/ai/evaluation/red_team/_red_team.py +697 -3067
- azure/ai/evaluation/red_team/_result_processor.py +610 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +34 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +3 -1
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +6 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +115 -13
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +24 -4
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +17 -4
- azure/ai/evaluation/simulator/_adversarial_simulator.py +9 -0
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +19 -5
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +4 -3
- {azure_ai_evaluation-1.10.0.dist-info → azure_ai_evaluation-1.11.1.dist-info}/METADATA +39 -3
- {azure_ai_evaluation-1.10.0.dist-info → azure_ai_evaluation-1.11.1.dist-info}/RECORD +49 -41
- {azure_ai_evaluation-1.10.0.dist-info → azure_ai_evaluation-1.11.1.dist-info}/WHEEL +1 -1
- {azure_ai_evaluation-1.10.0.dist-info → azure_ai_evaluation-1.11.1.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.10.0.dist-info → azure_ai_evaluation-1.11.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
"""
|
|
5
|
+
Retry utilities for Red Team Agent.
|
|
6
|
+
|
|
7
|
+
This module provides centralized retry logic and decorators for handling
|
|
8
|
+
network errors and other transient failures consistently across the codebase.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
14
|
+
from tenacity import (
|
|
15
|
+
retry,
|
|
16
|
+
stop_after_attempt,
|
|
17
|
+
wait_exponential,
|
|
18
|
+
retry_if_exception,
|
|
19
|
+
RetryError,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Retry imports for exception handling
|
|
23
|
+
import httpx
|
|
24
|
+
import httpcore
|
|
25
|
+
|
|
26
|
+
# Import Azure exceptions if available
|
|
27
|
+
try:
|
|
28
|
+
from azure.core.exceptions import ServiceRequestError, ServiceResponseError
|
|
29
|
+
|
|
30
|
+
AZURE_EXCEPTIONS = (ServiceRequestError, ServiceResponseError)
|
|
31
|
+
except ImportError:
|
|
32
|
+
AZURE_EXCEPTIONS = ()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Type variable for generic retry decorators
|
|
36
|
+
T = TypeVar("T")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RetryManager:
|
|
40
|
+
"""Centralized retry management for Red Team operations."""
|
|
41
|
+
|
|
42
|
+
# Default retry configuration
|
|
43
|
+
DEFAULT_MAX_ATTEMPTS = 5
|
|
44
|
+
DEFAULT_MIN_WAIT = 2
|
|
45
|
+
DEFAULT_MAX_WAIT = 30
|
|
46
|
+
DEFAULT_MULTIPLIER = 1.5
|
|
47
|
+
|
|
48
|
+
# Network-related exceptions that should trigger retries
|
|
49
|
+
NETWORK_EXCEPTIONS = (
|
|
50
|
+
httpx.ConnectTimeout,
|
|
51
|
+
httpx.ReadTimeout,
|
|
52
|
+
httpx.ConnectError,
|
|
53
|
+
httpx.HTTPError,
|
|
54
|
+
httpx.TimeoutException,
|
|
55
|
+
httpx.HTTPStatusError,
|
|
56
|
+
httpcore.ReadTimeout,
|
|
57
|
+
ConnectionError,
|
|
58
|
+
ConnectionRefusedError,
|
|
59
|
+
ConnectionResetError,
|
|
60
|
+
TimeoutError,
|
|
61
|
+
OSError,
|
|
62
|
+
IOError,
|
|
63
|
+
asyncio.TimeoutError,
|
|
64
|
+
) + AZURE_EXCEPTIONS
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
logger: Optional[logging.Logger] = None,
|
|
69
|
+
max_attempts: int = DEFAULT_MAX_ATTEMPTS,
|
|
70
|
+
min_wait: int = DEFAULT_MIN_WAIT,
|
|
71
|
+
max_wait: int = DEFAULT_MAX_WAIT,
|
|
72
|
+
multiplier: float = DEFAULT_MULTIPLIER,
|
|
73
|
+
):
|
|
74
|
+
"""Initialize retry manager.
|
|
75
|
+
|
|
76
|
+
:param logger: Logger instance for retry messages
|
|
77
|
+
:param max_attempts: Maximum number of retry attempts
|
|
78
|
+
:param min_wait: Minimum wait time between retries (seconds)
|
|
79
|
+
:param max_wait: Maximum wait time between retries (seconds)
|
|
80
|
+
:param multiplier: Exponential backoff multiplier
|
|
81
|
+
"""
|
|
82
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
83
|
+
self.max_attempts = max_attempts
|
|
84
|
+
self.min_wait = min_wait
|
|
85
|
+
self.max_wait = max_wait
|
|
86
|
+
self.multiplier = multiplier
|
|
87
|
+
|
|
88
|
+
def should_retry_exception(self, exception: Exception) -> bool:
|
|
89
|
+
"""Determine if an exception should trigger a retry.
|
|
90
|
+
|
|
91
|
+
:param exception: The exception to check
|
|
92
|
+
:return: True if the exception should trigger a retry
|
|
93
|
+
"""
|
|
94
|
+
if isinstance(exception, self.NETWORK_EXCEPTIONS):
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
# Special case for HTTP status errors
|
|
98
|
+
if isinstance(exception, httpx.HTTPStatusError):
|
|
99
|
+
return exception.response.status_code == 500 or "model_error" in str(exception)
|
|
100
|
+
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
def log_retry_attempt(self, retry_state) -> None:
|
|
104
|
+
"""Log retry attempts for visibility.
|
|
105
|
+
|
|
106
|
+
:param retry_state: The retry state object from tenacity
|
|
107
|
+
"""
|
|
108
|
+
exception = retry_state.outcome.exception()
|
|
109
|
+
if exception:
|
|
110
|
+
self.logger.warning(
|
|
111
|
+
f"Retry attempt {retry_state.attempt_number}/{self.max_attempts}: "
|
|
112
|
+
f"{exception.__class__.__name__} - {str(exception)}. "
|
|
113
|
+
f"Retrying in {retry_state.next_action.sleep} seconds..."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def log_retry_error(self, retry_state) -> Exception:
|
|
117
|
+
"""Log the final error after all retries failed.
|
|
118
|
+
|
|
119
|
+
:param retry_state: The retry state object from tenacity
|
|
120
|
+
:return: The final exception
|
|
121
|
+
"""
|
|
122
|
+
exception = retry_state.outcome.exception()
|
|
123
|
+
self.logger.error(
|
|
124
|
+
f"All retries failed after {retry_state.attempt_number} attempts. "
|
|
125
|
+
f"Final error: {exception.__class__.__name__}: {str(exception)}"
|
|
126
|
+
)
|
|
127
|
+
return exception
|
|
128
|
+
|
|
129
|
+
def create_retry_decorator(self, context: str = "") -> Callable:
|
|
130
|
+
"""Create a retry decorator with the configured settings.
|
|
131
|
+
|
|
132
|
+
:param context: Optional context string for logging
|
|
133
|
+
:return: Configured retry decorator
|
|
134
|
+
"""
|
|
135
|
+
context_prefix = f"[{context}] " if context else ""
|
|
136
|
+
|
|
137
|
+
def log_attempt(retry_state):
|
|
138
|
+
exception = retry_state.outcome.exception()
|
|
139
|
+
if exception:
|
|
140
|
+
self.logger.warning(
|
|
141
|
+
f"{context_prefix}Retry attempt {retry_state.attempt_number}/{self.max_attempts}: "
|
|
142
|
+
f"{exception.__class__.__name__} - {str(exception)}. "
|
|
143
|
+
f"Retrying in {retry_state.next_action.sleep} seconds..."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def log_final_error(retry_state):
|
|
147
|
+
exception = retry_state.outcome.exception()
|
|
148
|
+
self.logger.error(
|
|
149
|
+
f"{context_prefix}All retries failed after {retry_state.attempt_number} attempts. "
|
|
150
|
+
f"Final error: {exception.__class__.__name__}: {str(exception)}"
|
|
151
|
+
)
|
|
152
|
+
return exception
|
|
153
|
+
|
|
154
|
+
return retry(
|
|
155
|
+
retry=retry_if_exception(self.should_retry_exception),
|
|
156
|
+
stop=stop_after_attempt(self.max_attempts),
|
|
157
|
+
wait=wait_exponential(
|
|
158
|
+
multiplier=self.multiplier,
|
|
159
|
+
min=self.min_wait,
|
|
160
|
+
max=self.max_wait,
|
|
161
|
+
),
|
|
162
|
+
before_sleep=log_attempt,
|
|
163
|
+
retry_error_callback=log_final_error,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def get_retry_config(self) -> Dict[str, Any]:
|
|
167
|
+
"""Get retry configuration dictionary for backward compatibility.
|
|
168
|
+
|
|
169
|
+
:return: Dictionary containing retry configuration
|
|
170
|
+
"""
|
|
171
|
+
return {
|
|
172
|
+
"network_retry": {
|
|
173
|
+
"retry": retry_if_exception(self.should_retry_exception),
|
|
174
|
+
"stop": stop_after_attempt(self.max_attempts),
|
|
175
|
+
"wait": wait_exponential(
|
|
176
|
+
multiplier=self.multiplier,
|
|
177
|
+
min=self.min_wait,
|
|
178
|
+
max=self.max_wait,
|
|
179
|
+
),
|
|
180
|
+
"retry_error_callback": self.log_retry_error,
|
|
181
|
+
"before_sleep": self.log_retry_attempt,
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def create_standard_retry_manager(logger: Optional[logging.Logger] = None) -> RetryManager:
|
|
187
|
+
"""Create a standard retry manager with default settings.
|
|
188
|
+
|
|
189
|
+
:param logger: Optional logger instance
|
|
190
|
+
:return: Configured RetryManager instance
|
|
191
|
+
"""
|
|
192
|
+
return RetryManager(logger=logger)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# Convenience function for creating retry decorators
|
|
196
|
+
def create_retry_decorator(
|
|
197
|
+
logger: Optional[logging.Logger] = None,
|
|
198
|
+
context: str = "",
|
|
199
|
+
max_attempts: int = RetryManager.DEFAULT_MAX_ATTEMPTS,
|
|
200
|
+
min_wait: int = RetryManager.DEFAULT_MIN_WAIT,
|
|
201
|
+
max_wait: int = RetryManager.DEFAULT_MAX_WAIT,
|
|
202
|
+
) -> Callable:
|
|
203
|
+
"""Create a retry decorator with specified parameters.
|
|
204
|
+
|
|
205
|
+
:param logger: Optional logger instance
|
|
206
|
+
:param context: Optional context for logging
|
|
207
|
+
:param max_attempts: Maximum retry attempts
|
|
208
|
+
:param min_wait: Minimum wait time between retries
|
|
209
|
+
:param max_wait: Maximum wait time between retries
|
|
210
|
+
:return: Configured retry decorator
|
|
211
|
+
"""
|
|
212
|
+
retry_manager = RetryManager(
|
|
213
|
+
logger=logger,
|
|
214
|
+
max_attempts=max_attempts,
|
|
215
|
+
min_wait=min_wait,
|
|
216
|
+
max_wait=max_wait,
|
|
217
|
+
)
|
|
218
|
+
return retry_manager.create_retry_decorator(context)
|
|
@@ -88,12 +88,15 @@ def get_converter_for_strategy(
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
def get_chat_target(
|
|
91
|
-
target: Union[PromptChatTarget, Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
|
|
91
|
+
target: Union[PromptChatTarget, Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration],
|
|
92
|
+
prompt_to_context: Optional[Dict[str, str]] = None,
|
|
92
93
|
) -> PromptChatTarget:
|
|
93
94
|
"""Convert various target types to a PromptChatTarget.
|
|
94
95
|
|
|
95
96
|
:param target: The target to convert
|
|
96
97
|
:type target: Union[PromptChatTarget, Callable, AzureOpenAIModelConfiguration, OpenAIModelConfiguration]
|
|
98
|
+
:param prompt_to_context: Optional mapping from prompt content to context
|
|
99
|
+
:type prompt_to_context: Optional[Dict[str, str]]
|
|
97
100
|
:return: A PromptChatTarget instance
|
|
98
101
|
:rtype: PromptChatTarget
|
|
99
102
|
"""
|
|
@@ -151,7 +154,7 @@ def get_chat_target(
|
|
|
151
154
|
has_callback_signature = False
|
|
152
155
|
|
|
153
156
|
if has_callback_signature:
|
|
154
|
-
chat_target = _CallbackChatTarget(callback=target)
|
|
157
|
+
chat_target = _CallbackChatTarget(callback=target, prompt_to_context=prompt_to_context)
|
|
155
158
|
else:
|
|
156
159
|
|
|
157
160
|
async def callback_target(
|
|
@@ -163,8 +166,18 @@ def get_chat_target(
|
|
|
163
166
|
messages_list = [_message_to_dict(chat_message) for chat_message in messages] # type: ignore
|
|
164
167
|
latest_message = messages_list[-1]
|
|
165
168
|
application_input = latest_message["content"]
|
|
169
|
+
|
|
170
|
+
# Check if target accepts context as a parameter
|
|
171
|
+
sig = inspect.signature(target)
|
|
172
|
+
param_names = list(sig.parameters.keys())
|
|
173
|
+
|
|
166
174
|
try:
|
|
167
|
-
|
|
175
|
+
if "context" in param_names:
|
|
176
|
+
# Pass context if the target function accepts it
|
|
177
|
+
response = target(query=application_input, context=context)
|
|
178
|
+
else:
|
|
179
|
+
# Fallback to original behavior for compatibility
|
|
180
|
+
response = target(query=application_input)
|
|
168
181
|
except Exception as e:
|
|
169
182
|
response = f"Something went wrong {e!s}"
|
|
170
183
|
|
|
@@ -177,7 +190,7 @@ def get_chat_target(
|
|
|
177
190
|
messages_list.append(formatted_response) # type: ignore
|
|
178
191
|
return {"messages": messages_list, "stream": stream, "session_state": session_state, "context": {}}
|
|
179
192
|
|
|
180
|
-
chat_target = _CallbackChatTarget(callback=callback_target) # type: ignore
|
|
193
|
+
chat_target = _CallbackChatTarget(callback=callback_target, prompt_to_context=prompt_to_context) # type: ignore
|
|
181
194
|
|
|
182
195
|
return chat_target
|
|
183
196
|
|
|
@@ -8,6 +8,7 @@ import logging
|
|
|
8
8
|
import random
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
|
10
10
|
import uuid
|
|
11
|
+
import warnings
|
|
11
12
|
|
|
12
13
|
from tqdm import tqdm
|
|
13
14
|
|
|
@@ -68,6 +69,14 @@ class AdversarialSimulator:
|
|
|
68
69
|
|
|
69
70
|
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
70
71
|
"""Constructor."""
|
|
72
|
+
warnings.warn(
|
|
73
|
+
"DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
|
|
74
|
+
+ " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
|
|
75
|
+
+ "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
|
|
76
|
+
+ " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
|
|
77
|
+
DeprecationWarning,
|
|
78
|
+
stacklevel=2,
|
|
79
|
+
)
|
|
71
80
|
|
|
72
81
|
if is_onedp_project(azure_ai_project):
|
|
73
82
|
self.azure_ai_project = azure_ai_project
|
|
@@ -30,7 +30,11 @@ class GeneratedRAIClient:
|
|
|
30
30
|
:type token_manager: ~azure.ai.evaluation.simulator._model_tools._identity_manager.APITokenManager
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
-
def __init__(
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
azure_ai_project: Union[AzureAIProject, str],
|
|
36
|
+
token_manager: ManagedIdentityAPITokenManager,
|
|
37
|
+
):
|
|
34
38
|
self.azure_ai_project = azure_ai_project
|
|
35
39
|
self.token_manager = token_manager
|
|
36
40
|
|
|
@@ -53,10 +57,14 @@ class GeneratedRAIClient:
|
|
|
53
57
|
).rai_svc
|
|
54
58
|
else:
|
|
55
59
|
self._client = AIProjectClient(
|
|
56
|
-
endpoint=azure_ai_project,
|
|
60
|
+
endpoint=azure_ai_project,
|
|
61
|
+
credential=token_manager,
|
|
62
|
+
user_agent_policy=user_agent_policy,
|
|
57
63
|
).red_teams
|
|
58
64
|
self._evaluation_onedp_client = EvaluationServiceOneDPClient(
|
|
59
|
-
endpoint=azure_ai_project,
|
|
65
|
+
endpoint=azure_ai_project,
|
|
66
|
+
credential=token_manager,
|
|
67
|
+
user_agent_policy=user_agent_policy,
|
|
60
68
|
)
|
|
61
69
|
|
|
62
70
|
def _get_service_discovery_url(self):
|
|
@@ -68,7 +76,10 @@ class GeneratedRAIClient:
|
|
|
68
76
|
import requests
|
|
69
77
|
|
|
70
78
|
bearer_token = self._fetch_or_reuse_token(self.token_manager)
|
|
71
|
-
headers = {
|
|
79
|
+
headers = {
|
|
80
|
+
"Authorization": f"Bearer {bearer_token}",
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
}
|
|
72
83
|
|
|
73
84
|
response = requests.get(
|
|
74
85
|
f"https://management.azure.com/subscriptions/{self.azure_ai_project['subscription_id']}/"
|
|
@@ -100,6 +111,7 @@ class GeneratedRAIClient:
|
|
|
100
111
|
risk_category: Optional[str] = None,
|
|
101
112
|
application_scenario: str = None,
|
|
102
113
|
strategy: Optional[str] = None,
|
|
114
|
+
language: str = "en",
|
|
103
115
|
scan_session_id: Optional[str] = None,
|
|
104
116
|
) -> Dict:
|
|
105
117
|
"""Get attack objectives using the auto-generated operations.
|
|
@@ -112,6 +124,8 @@ class GeneratedRAIClient:
|
|
|
112
124
|
:type application_scenario: str
|
|
113
125
|
:param strategy: Optional strategy to filter the attack objectives
|
|
114
126
|
:type strategy: Optional[str]
|
|
127
|
+
:param language: Language code for the attack objectives (e.g., "en", "es", "fr")
|
|
128
|
+
:type language: str
|
|
115
129
|
:param scan_session_id: Optional unique session ID for the scan
|
|
116
130
|
:type scan_session_id: Optional[str]
|
|
117
131
|
:return: The attack objectives
|
|
@@ -122,7 +136,7 @@ class GeneratedRAIClient:
|
|
|
122
136
|
response = self._client.get_attack_objectives(
|
|
123
137
|
risk_types=[risk_type],
|
|
124
138
|
risk_category=risk_category,
|
|
125
|
-
lang=
|
|
139
|
+
lang=language,
|
|
126
140
|
strategy=strategy,
|
|
127
141
|
headers={"x-ms-client-request-id": scan_session_id},
|
|
128
142
|
)
|
|
@@ -208,7 +208,7 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
|
|
|
208
208
|
flag = True
|
|
209
209
|
while flag:
|
|
210
210
|
try:
|
|
211
|
-
response = session.
|
|
211
|
+
response = session.red_teams.operation_results(operation_id, headers=headers)
|
|
212
212
|
except Exception as e:
|
|
213
213
|
from types import SimpleNamespace # pylint: disable=forgotten-debug-statement
|
|
214
214
|
|
|
@@ -217,9 +217,10 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
|
|
|
217
217
|
response_data = response
|
|
218
218
|
flag = False
|
|
219
219
|
break
|
|
220
|
-
if response.
|
|
221
|
-
response_data =
|
|
220
|
+
if not isinstance(response, SimpleNamespace) and response.get("object") == "chat.completion":
|
|
221
|
+
response_data = response
|
|
222
222
|
flag = False
|
|
223
|
+
break
|
|
223
224
|
else:
|
|
224
225
|
request_count += 1
|
|
225
226
|
sleep_time = RAIService.SLEEP_TIME**request_count
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: azure-ai-evaluation
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.11.1
|
|
4
4
|
Summary: Microsoft Azure Evaluation Library for Python
|
|
5
5
|
Home-page: https://github.com/Azure/azure-sdk-for-python
|
|
6
6
|
Author: Microsoft Corporation
|
|
@@ -28,13 +28,28 @@ Requires-Dist: nltk>=3.9.1
|
|
|
28
28
|
Requires-Dist: azure-storage-blob>=12.10.0
|
|
29
29
|
Requires-Dist: httpx>=0.25.1
|
|
30
30
|
Requires-Dist: pandas<3.0.0,>=2.1.2
|
|
31
|
-
Requires-Dist: openai>=1.
|
|
31
|
+
Requires-Dist: openai>=1.108.0
|
|
32
32
|
Requires-Dist: ruamel.yaml<1.0.0,>=0.17.10
|
|
33
33
|
Requires-Dist: msrest>=0.6.21
|
|
34
34
|
Requires-Dist: Jinja2>=3.1.6
|
|
35
35
|
Requires-Dist: aiohttp>=3.0
|
|
36
36
|
Provides-Extra: redteam
|
|
37
37
|
Requires-Dist: pyrit==0.8.1; extra == "redteam"
|
|
38
|
+
Requires-Dist: duckdb==1.3.2; extra == "redteam"
|
|
39
|
+
Dynamic: author
|
|
40
|
+
Dynamic: author-email
|
|
41
|
+
Dynamic: classifier
|
|
42
|
+
Dynamic: description
|
|
43
|
+
Dynamic: description-content-type
|
|
44
|
+
Dynamic: home-page
|
|
45
|
+
Dynamic: keywords
|
|
46
|
+
Dynamic: license
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
Dynamic: project-url
|
|
49
|
+
Dynamic: provides-extra
|
|
50
|
+
Dynamic: requires-dist
|
|
51
|
+
Dynamic: requires-python
|
|
52
|
+
Dynamic: summary
|
|
38
53
|
|
|
39
54
|
# Azure AI Evaluation client library for Python
|
|
40
55
|
|
|
@@ -398,6 +413,27 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con
|
|
|
398
413
|
|
|
399
414
|
# Release History
|
|
400
415
|
|
|
416
|
+
## 1.11.1 (2025-09-17)
|
|
417
|
+
|
|
418
|
+
### Bugs Fixed
|
|
419
|
+
- Pinning duckdb version to 1.3.2 for redteam extra to fix error `TypeError: unhashable type: '_duckdb.typing.DuckDBPyType'`
|
|
420
|
+
|
|
421
|
+
## 1.11.0 (2025-09-02)
|
|
422
|
+
|
|
423
|
+
### Features Added
|
|
424
|
+
- Added support for user-supplied tags in the `evaluate` function. Tags are key-value pairs that can be used for experiment tracking, A/B testing, filtering, and organizing evaluation runs. The function accepts a `tags` parameter.
|
|
425
|
+
- Added support for user-supplied TokenCredentials with LLM based evaluators.
|
|
426
|
+
- Enhanced `GroundednessEvaluator` to support AI agent evaluation with tool calls. The evaluator now accepts agent response data containing tool calls and can extract context from `file_search` tool results for groundedness assessment. This enables evaluation of AI agents that use tools to retrieve information and generate responses. Note: Agent groundedness evaluation is currently supported only when the `file_search` tool is used.
|
|
427
|
+
- Added `language` parameter to `RedTeam` class for multilingual red team scanning support. The parameter accepts values from `SupportedLanguages` enum including English, Spanish, French, German, Italian, Portuguese, Japanese, Korean, and Simplified Chinese, enabling red team attacks to be generated and conducted in multiple languages.
|
|
428
|
+
- Added support for IndirectAttack and UngroundedAttributes risk categories in `RedTeam` scanning. These new risk categories expand red team capabilities to detect cross-platform indirect attacks and evaluate ungrounded inferences about human attributes including emotional state and protected class information.
|
|
429
|
+
|
|
430
|
+
### Bugs Fixed
|
|
431
|
+
- Fixed issue where evaluation results were not properly aligned with input data, leading to incorrect metrics being reported.
|
|
432
|
+
|
|
433
|
+
### Other Changes
|
|
434
|
+
- Deprecating `AdversarialSimulator` in favor of the [AI Red Teaming Agent](https://aka.ms/airedteamingagent-sample). `AdversarialSimulator` will be removed in the next minor release.
|
|
435
|
+
- Moved retry configuration constants (`MAX_RETRY_ATTEMPTS`, `MAX_RETRY_WAIT_SECONDS`, `MIN_RETRY_WAIT_SECONDS`) from `RedTeam` class to new `RetryManager` class for better code organization and configurability.
|
|
436
|
+
|
|
401
437
|
## 1.10.0 (2025-07-31)
|
|
402
438
|
|
|
403
439
|
### Breaking Changes
|