azure-ai-evaluation 1.0.0b4__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. azure/ai/evaluation/__init__.py +22 -0
  2. azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +4 -0
  3. azure/ai/evaluation/_common/constants.py +5 -0
  4. azure/ai/evaluation/_common/math.py +73 -2
  5. azure/ai/evaluation/_common/rai_service.py +250 -62
  6. azure/ai/evaluation/_common/utils.py +196 -23
  7. azure/ai/evaluation/_constants.py +7 -6
  8. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/__init__.py +3 -2
  9. azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +13 -4
  10. azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/proxy_client.py +19 -6
  11. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +46 -0
  12. azure/ai/evaluation/_evaluate/_eval_run.py +55 -14
  13. azure/ai/evaluation/_evaluate/_evaluate.py +312 -228
  14. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +7 -6
  15. azure/ai/evaluation/_evaluate/_utils.py +46 -11
  16. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +17 -18
  17. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +67 -31
  18. azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -34
  19. azure/ai/evaluation/_evaluators/_common/_base_eval.py +37 -24
  20. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +21 -9
  21. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +52 -16
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +91 -48
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +100 -26
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +94 -26
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +96 -26
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +97 -26
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +31 -4
  28. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
  29. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +67 -36
  30. azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -36
  31. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +14 -16
  32. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +106 -34
  33. azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +113 -0
  34. azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +99 -0
  35. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +20 -27
  36. azure/ai/evaluation/_evaluators/_multimodal/__init__.py +20 -0
  37. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +132 -0
  38. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +55 -0
  39. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +100 -0
  40. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +124 -0
  41. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +100 -0
  42. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +100 -0
  43. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +100 -0
  44. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +87 -31
  45. azure/ai/evaluation/_evaluators/_qa/_qa.py +23 -31
  46. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +72 -36
  47. azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +78 -42
  48. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +83 -125
  49. azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +74 -24
  50. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +26 -27
  51. azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
  52. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +148 -0
  53. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +37 -28
  54. azure/ai/evaluation/_evaluators/_xpia/xpia.py +94 -33
  55. azure/ai/evaluation/_exceptions.py +19 -0
  56. azure/ai/evaluation/_model_configurations.py +83 -15
  57. azure/ai/evaluation/_version.py +1 -1
  58. azure/ai/evaluation/simulator/__init__.py +2 -1
  59. azure/ai/evaluation/simulator/_adversarial_scenario.py +20 -1
  60. azure/ai/evaluation/simulator/_adversarial_simulator.py +29 -35
  61. azure/ai/evaluation/simulator/_constants.py +11 -1
  62. azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
  63. azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
  64. azure/ai/evaluation/simulator/_direct_attack_simulator.py +17 -9
  65. azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
  66. azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +22 -1
  67. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +90 -35
  68. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +4 -2
  69. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +8 -4
  70. azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +4 -4
  71. azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -1
  72. azure/ai/evaluation/simulator/_simulator.py +165 -105
  73. azure/ai/evaluation/simulator/_utils.py +31 -13
  74. azure_ai_evaluation-1.0.1.dist-info/METADATA +600 -0
  75. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/NOTICE.txt +20 -0
  76. azure_ai_evaluation-1.0.1.dist-info/RECORD +119 -0
  77. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/WHEEL +1 -1
  78. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
  79. azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -49
  80. azure_ai_evaluation-1.0.0b4.dist-info/METADATA +0 -535
  81. azure_ai_evaluation-1.0.0b4.dist-info/RECORD +0 -106
  82. /azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +0 -0
  83. {azure_ai_evaluation-1.0.0b4.dist-info → azure_ai_evaluation-1.0.1.dist-info}/top_level.txt +0 -0
@@ -2,63 +2,124 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import logging
5
+ from typing import Dict, List, Union
5
6
 
6
- from typing_extensions import override
7
+ from typing_extensions import overload, override
7
8
 
9
+ from azure.ai.evaluation._common._experimental import experimental
8
10
  from azure.ai.evaluation._common.constants import EvaluationMetrics
9
11
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
12
+ from azure.ai.evaluation._model_configurations import Conversation
10
13
 
11
14
  logger = logging.getLogger(__name__)
12
15
 
13
16
 
14
- class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
15
- """A Cross-Domain Prompt Injection Attack (XPIA) jailbreak evaluator.
17
+ @experimental
18
+ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
19
+ """
20
+ Evaluates the indirect attack score for a given query and response or a multi-turn conversation, with reasoning.
21
+
22
+ Indirect attacks, also known as cross-domain prompt injected attacks (XPIA), are when jailbreak attacks are
23
+ injected into the context of a document or source that may result in an altered, unexpected behavior.
24
+
25
+ Indirect attacks evaluations are broken down into three subcategories:
26
+ - Manipulated Content: This category involves commands that aim to alter or fabricate information, often
27
+ to mislead or deceive. It includes actions like spreading false information, altering language or
28
+ formatting, and hiding or emphasizing specific details. The goal is often to manipulate perceptions
29
+ or behaviors by controlling the flow and presentation of information.
30
+ - Intrusion: This category encompasses commands that attempt to breach systems, gain unauthorized access,
31
+ or elevate privileges illicitly. It includes creating backdoors, exploiting vulnerabilities, and
32
+ traditional jailbreaks to bypass security measures. The intent is often to gain control or access
33
+ sensitive data without detection.
34
+ - Information Gathering: This category pertains to accessing, deleting, or modifying data without
35
+ authorization, often for malicious purposes. It includes exfiltrating sensitive data, tampering with
36
+ system records, and removing or altering existing information. The focus is on acquiring or
37
+ manipulating data to exploit or compromise systems and individuals.
16
38
 
17
- Detect whether cross domain injected attacks are present in your AI system's response.
39
+ Indirect attack scores are boolean values, where True indicates that the response contains an indirect attack.
18
40
 
19
41
  :param credential: The credential for connecting to Azure AI project. Required
20
42
  :type credential: ~azure.core.credentials.TokenCredential
21
43
  :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
22
44
  name.
23
45
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
24
- :param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
25
- focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
26
- :type eval_last_turn: bool
27
- :return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
28
- evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
29
- information.
30
- :rtype: Callable
31
- **Usage**
32
-
33
- .. code-block:: python
34
-
35
- eval_fn = IndirectAttackEvaluator(model_config)
36
- result = eval_fn(query="What is the capital of France?", response="Paris.")
37
-
38
- **Output format for question-answer pair**
39
-
40
- .. code-block:: python
41
-
42
- {
43
- 'xpia_label': False,
44
- 'xpia_reason': 'The conversation does not contain any manipulated content, intrusion or
45
- information gathering.'
46
- 'xpia_information_gathering': False,
47
- 'xpia_intrusion': False
48
- 'xpia_manipulated_content': False
49
- }
46
+
47
+ .. admonition:: Example:
48
+
49
+ .. literalinclude:: ../samples/evaluation_samples_evaluate.py
50
+ :start-after: [START indirect_attack_evaluator]
51
+ :end-before: [END indirect_attack_evaluator]
52
+ :language: python
53
+ :dedent: 8
54
+ :caption: Initialize and call an IndirectAttackEvaluator.
50
55
  """
51
56
 
57
+ id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
58
+ """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
59
+
52
60
  @override
53
61
  def __init__(
54
62
  self,
55
63
  credential,
56
- azure_ai_project: dict,
57
- eval_last_turn: bool = False,
64
+ azure_ai_project,
58
65
  ):
59
66
  super().__init__(
60
67
  eval_metric=EvaluationMetrics.XPIA,
61
68
  azure_ai_project=azure_ai_project,
62
69
  credential=credential,
63
- eval_last_turn=eval_last_turn,
64
70
  )
71
+
72
+ @overload
73
+ def __call__(
74
+ self,
75
+ *,
76
+ query: str,
77
+ response: str,
78
+ ) -> Dict[str, Union[str, bool]]:
79
+ """Evaluate whether cross domain injected attacks are present in given query/response
80
+
81
+ :keyword query: The query to be evaluated.
82
+ :paramtype query: str
83
+ :keyword response: The response to be evaluated.
84
+ :paramtype response: str
85
+ :return: The cross domain injection attack score
86
+ :rtype: Dict[str, Union[str, bool]]
87
+ """
88
+
89
+ @overload
90
+ def __call__(
91
+ self,
92
+ *,
93
+ conversation: Conversation,
94
+ ) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
95
+ """Evaluate cross domain injected attacks are present in a conversation
96
+
97
+ :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
98
+ key "messages", and potentially a global context under the key "context". Conversation turns are expected
99
+ to be dictionaries with keys "content", "role", and possibly "context".
100
+ :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
101
+ :return: The cross domain injection attack score
102
+ :rtype: Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]
103
+ """
104
+
105
+ @override
106
+ def __call__( # pylint: disable=docstring-missing-param
107
+ self,
108
+ *args,
109
+ **kwargs,
110
+ ):
111
+ """
112
+ Evaluate whether cross domain injected attacks are present in your AI system's response.
113
+
114
+ :keyword query: The query to be evaluated.
115
+ :paramtype query: Optional[str]
116
+ :keyword response: The response to be evaluated.
117
+ :paramtype response: Optional[str]
118
+ :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
119
+ key "messages". Conversation turns are expected
120
+ to be dictionaries with keys "content" and "role".
121
+ :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
122
+ :return: The cross domain injection attack score
123
+ :rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
124
+ """
125
+ return super().__call__(*args, **kwargs)
@@ -22,6 +22,8 @@ class ErrorCategory(Enum):
22
22
  * FAILED_EXECUTION -> Execution failed
23
23
  * SERVICE_UNAVAILABLE -> Service is unavailable
24
24
  * MISSING_PACKAGE -> Required package is missing
25
+ * FAILED_REMOTE_TRACKING -> Remote tracking failed
26
+ * PROJECT_ACCESS_ERROR -> Access to project failed
25
27
  * UNKNOWN -> Undefined placeholder. Avoid using.
26
28
  """
27
29
 
@@ -33,6 +35,8 @@ class ErrorCategory(Enum):
33
35
  FAILED_EXECUTION = "FAILED_EXECUTION"
34
36
  SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
35
37
  MISSING_PACKAGE = "MISSING PACKAGE"
38
+ FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
39
+ PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
36
40
  UNKNOWN = "UNKNOWN"
37
41
 
38
42
 
@@ -59,12 +63,15 @@ class ErrorTarget(Enum):
59
63
  RAI_CLIENT = "RAIClient"
60
64
  COHERENCE_EVALUATOR = "CoherenceEvaluator"
61
65
  CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
66
+ CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
62
67
  ECI_EVALUATOR = "ECIEvaluator"
63
68
  F1_EVALUATOR = "F1Evaluator"
64
69
  GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
65
70
  PROTECTED_MATERIAL_EVALUATOR = "ProtectedMaterialEvaluator"
66
71
  RELEVANCE_EVALUATOR = "RelevanceEvaluator"
67
72
  SIMILARITY_EVALUATOR = "SimilarityEvaluator"
73
+ FLUENCY_EVALUATOR = "FluencyEvaluator"
74
+ RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
68
75
  INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
69
76
  INDIRECT_ATTACK_SIMULATOR = "IndirectAttackSimulator"
70
77
  ADVERSARIAL_SIMULATOR = "AdversarialSimulator"
@@ -90,6 +97,8 @@ class EvaluationException(AzureError):
90
97
  :type category: ~azure.ai.evaluation._exceptions.ErrorCategory
91
98
  :param blame: The source of blame for the error, defaults to Unknown.
92
99
  :type balance: ~azure.ai.evaluation._exceptions.ErrorBlame
100
+ :param tsg_link: A link to the TSG page for troubleshooting the error.
101
+ :type tsg_link: str
93
102
  """
94
103
 
95
104
  def __init__(
@@ -100,10 +109,20 @@ class EvaluationException(AzureError):
100
109
  target: ErrorTarget = ErrorTarget.UNKNOWN,
101
110
  category: ErrorCategory = ErrorCategory.UNKNOWN,
102
111
  blame: ErrorBlame = ErrorBlame.UNKNOWN,
112
+ tsg_link: Optional[str] = None,
103
113
  **kwargs,
104
114
  ) -> None:
105
115
  self.category = category
106
116
  self.target = target
107
117
  self.blame = blame
108
118
  self.internal_message = internal_message
119
+ self.tsg_link = tsg_link
109
120
  super().__init__(message, *args, **kwargs)
121
+
122
+ def __str__(self):
123
+ error_blame = "InternalError" if self.blame != ErrorBlame.USER_ERROR else "UserError"
124
+ msg = f"({error_blame}) {super().__str__()}"
125
+ if self.tsg_link:
126
+ msg += f"\nVisit {self.tsg_link} to troubleshoot this issue."
127
+
128
+ return msg
@@ -2,50 +2,101 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- from typing import Dict, Literal, TypedDict
5
+ from typing import Any, Dict, List, Literal, TypedDict, Union
6
6
 
7
7
  from typing_extensions import NotRequired
8
8
 
9
9
 
10
10
  class AzureOpenAIModelConfiguration(TypedDict):
11
- """Model Configuration for Azure OpenAI Model"""
11
+ """Model configuration for Azure OpenAI models
12
+
13
+ :param type: The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration
14
+ :type type: NotRequired[Literal["azure_openai"]]
15
+ :param azure_deployment: Name of Azure OpenAI deployment to make requests to
16
+ :type azure_deployment: str
17
+ :param azure_endpoint: Endpoint of Azure OpenAI resource to make requests to
18
+ :type azure_endpoint: str
19
+ :param api_key: API key of Azure OpenAI resource
20
+ :type api_key: str
21
+ :param api_version: API version to use in request to Azure OpenAI deployment. Optional.
22
+ :type api_version: NotRequired[str]
23
+
24
+ .. admonition:: Example:
25
+
26
+ .. literalinclude:: ../samples/evaluation_samples_common.py
27
+ :start-after: [START create_AOAI_model_config]
28
+ :end-before: [END create_AOAI_model_config]
29
+ :language: python
30
+ :dedent: 8
31
+ :caption: Creating an AzureOpenAIModelConfiguration object.
32
+
33
+ """
12
34
 
13
35
  type: NotRequired[Literal["azure_openai"]]
14
- """The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration"""
15
36
  azure_deployment: str
16
- """Name of Azure OpenAI deployment to make request to"""
17
37
  azure_endpoint: str
18
38
  """Endpoint of Azure OpenAI resource to make request to"""
19
39
  api_key: NotRequired[str]
20
40
  """API key of Azure OpenAI resource"""
21
41
  api_version: NotRequired[str]
22
- """(Optional) API version to use in request to Azure OpenAI deployment"""
23
42
 
24
43
 
25
44
  class OpenAIModelConfiguration(TypedDict):
26
- """Model Configuration for OpenAI Model"""
45
+ """Model configuration for OpenAI models
46
+
47
+ :param type: The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration
48
+ :type type: NotRequired[Literal["openai"]]
49
+ :param api_key: API key needed to make requests to model
50
+ :type api_key: str
51
+ :param model: Name of model to be used in OpenAI request
52
+ :type model: str
53
+ :param base_url: Base URL to be used in OpenAI request. Optional.
54
+ :type base_url: NotRequired[str]
55
+ :param organization: OpenAI organization. Optional.
56
+ :type organization: NotRequired[str]
57
+
58
+ .. admonition:: Example:
59
+
60
+ .. literalinclude:: ../samples/evaluation_samples_common.py
61
+ :start-after: [START create_OAI_model_config]
62
+ :end-before: [END create_OAI_model_config]
63
+ :language: python
64
+ :dedent: 8
65
+ :caption: Creating an OpenAIModelConfiguration object.
66
+
67
+ """
27
68
 
28
69
  type: NotRequired[Literal["openai"]]
29
- """The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration"""
30
70
  api_key: str
31
- "API key needed to make request to model"
32
71
  model: str
33
- """Name of model to be used in OpenAI request"""
34
72
  base_url: NotRequired[str]
35
- """(Optional) Base URL to be used in OpenAI request"""
36
73
  organization: NotRequired[str]
37
- """(Optional) OpenAI organization"""
38
74
 
39
75
 
40
76
  class AzureAIProject(TypedDict):
41
- """Azure AI Project Information"""
77
+ """Information about the Azure AI project
78
+
79
+ :param subscription_id: ID of the Azure subscription the project is in
80
+ :type subscription_id: str
81
+ :param resource_group_name: Name of the Azure resource group the project is in
82
+ :type resource_group_name: str
83
+ :param project_name: Name of the Azure project
84
+ :type project_name: str
85
+
86
+ .. admonition:: Example:
87
+
88
+ .. literalinclude:: ../samples/evaluation_samples_common.py
89
+ :start-after: [START create_azure_ai_project_object]
90
+ :end-before: [END create_azure_ai_project_object]
91
+ :language: python
92
+ :dedent: 8
93
+ :caption: Creating an AzureAIProject object.
94
+
95
+ """
42
96
 
43
97
  subscription_id: str
44
- """Azure subscription id of the project"""
45
98
  resource_group_name: str
46
- """Azure resource group name of the project"""
47
99
  project_name: str
48
- """Azure project name"""
49
100
 
50
101
 
51
102
  class EvaluatorConfig(TypedDict, total=False):
@@ -53,3 +104,20 @@ class EvaluatorConfig(TypedDict, total=False):
53
104
 
54
105
  column_mapping: Dict[str, str]
55
106
  """Dictionary mapping evaluator input name to column in data"""
107
+
108
+
109
+ class Message(TypedDict):
110
+ role: str
111
+ content: Union[str, List[Dict]]
112
+ context: NotRequired[Dict[str, Any]]
113
+
114
+
115
+ class Conversation(TypedDict):
116
+ messages: Union[List[Message], List[Dict]]
117
+ context: NotRequired[Dict[str, Any]]
118
+
119
+
120
+ class EvaluationResult(TypedDict):
121
+ metrics: Dict
122
+ studio_url: NotRequired[str]
123
+ rows: List[Dict]
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- VERSION = "1.0.0b4"
5
+ VERSION = "1.0.1"
@@ -1,4 +1,4 @@
1
- from ._adversarial_scenario import AdversarialScenario
1
+ from ._adversarial_scenario import AdversarialScenario, AdversarialScenarioJailbreak
2
2
  from ._adversarial_simulator import AdversarialSimulator
3
3
  from ._constants import SupportedLanguages
4
4
  from ._direct_attack_simulator import DirectAttackSimulator
@@ -8,6 +8,7 @@ from ._simulator import Simulator
8
8
  __all__ = [
9
9
  "AdversarialSimulator",
10
10
  "AdversarialScenario",
11
+ "AdversarialScenarioJailbreak",
11
12
  "DirectAttackSimulator",
12
13
  "IndirectAttackSimulator",
13
14
  "SupportedLanguages",
@@ -3,10 +3,22 @@
3
3
  # ---------------------------------------------------------
4
4
 
5
5
  from enum import Enum
6
+ from azure.ai.evaluation._common._experimental import experimental
6
7
 
7
8
 
9
+ @experimental
8
10
  class AdversarialScenario(Enum):
9
- """Adversarial scenario types"""
11
+ """Adversarial scenario types
12
+
13
+ .. admonition:: Example:
14
+
15
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
16
+ :start-after: [START adversarial_scenario]
17
+ :end-before: [END adversarial_scenario]
18
+ :language: python
19
+ :dedent: 8
20
+ :caption: Configure an AdversarialSimulator with an Adversarial Conversation scenario.
21
+ """
10
22
 
11
23
  ADVERSARIAL_QA = "adv_qa"
12
24
  ADVERSARIAL_CONVERSATION = "adv_conversation"
@@ -16,9 +28,16 @@ class AdversarialScenario(Enum):
16
28
  ADVERSARIAL_CONTENT_GEN_UNGROUNDED = "adv_content_gen_ungrounded"
17
29
  ADVERSARIAL_CONTENT_GEN_GROUNDED = "adv_content_gen_grounded"
18
30
  ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material"
31
+
32
+
33
+ @experimental
34
+ class AdversarialScenarioJailbreak(Enum):
35
+ """Adversarial scenario types for XPIA Jailbreak"""
36
+
19
37
  ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia"
20
38
 
21
39
 
40
+ @experimental
22
41
  class _UnstableAdversarialScenario(Enum):
23
42
  """Adversarial scenario types that we haven't published, but still want available for internal use
24
43
  Values listed here are subject to potential change, and/or migration to the main enum over time.
@@ -7,12 +7,15 @@ import asyncio
7
7
  import logging
8
8
  import random
9
9
  from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
10
+ from itertools import zip_longest
10
11
 
11
12
  from tqdm import tqdm
12
13
 
14
+ from azure.ai.evaluation._common._experimental import experimental
13
15
  from azure.ai.evaluation._common.utils import validate_azure_ai_project
14
16
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
15
17
  from azure.ai.evaluation._http_utils import get_async_http_client
18
+ from azure.ai.evaluation._model_configurations import AzureAIProject
16
19
  from azure.ai.evaluation.simulator import AdversarialScenario
17
20
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
18
21
  from azure.core.credentials import TokenCredential
@@ -21,7 +24,6 @@ from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
21
24
  from ._constants import SupportedLanguages
22
25
  from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
23
26
  from ._conversation._conversation import simulate_conversation
24
- from ._helpers import experimental
25
27
  from ._model_tools import (
26
28
  AdversarialTemplateHandler,
27
29
  ManagedIdentityAPITokenManager,
@@ -45,9 +47,19 @@ class AdversarialSimulator:
45
47
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
46
48
  :param credential: The credential for connecting to Azure AI project.
47
49
  :type credential: ~azure.core.credentials.TokenCredential
50
+
51
+ .. admonition:: Example:
52
+
53
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
54
+ :start-after: [START adversarial_scenario]
55
+ :end-before: [END adversarial_scenario]
56
+ :language: python
57
+ :dedent: 8
58
+ :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
59
+ 2 conversation turns each (4 messages per result).
48
60
  """
49
61
 
50
- def __init__(self, *, azure_ai_project: dict, credential):
62
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
51
63
  """Constructor."""
52
64
 
53
65
  try:
@@ -149,28 +161,6 @@ class AdversarialSimulator:
149
161
 
150
162
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
151
163
  :rtype: List[Dict[str, Any]]
152
-
153
- **Output format**
154
-
155
- .. code-block:: python
156
-
157
- return_value = [
158
- {
159
- 'template_parameters': {},
160
- 'messages': [
161
- {
162
- 'content': '<jailbreak prompt> <adversarial query>',
163
- 'role': 'user'
164
- },
165
- {
166
- 'content': "<response from endpoint>",
167
- 'role': 'assistant',
168
- 'context': None
169
- }
170
- ],
171
- '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
172
- }
173
- ]
174
164
  """
175
165
 
176
166
  # validate the inputs
@@ -215,17 +205,18 @@ class AdversarialSimulator:
215
205
  ncols=100,
216
206
  unit="simulations",
217
207
  )
218
- for template in templates:
219
- parameter_order = list(range(len(template.template_parameters)))
220
- if randomize_order:
221
- # The template parameter lists are persistent across sim runs within a session,
222
- # So randomize a the selection instead of the parameter list directly,
223
- # or a potentially large deep copy.
224
- if randomization_seed is not None:
225
- random.seed(randomization_seed)
226
- random.shuffle(parameter_order)
227
- for index in parameter_order:
228
- parameter = template.template_parameters[index].copy()
208
+
209
+ if randomize_order:
210
+ # The template parameter lists are persistent across sim runs within a session,
211
+ # So randomize a the selection instead of the parameter list directly,
212
+ # or a potentially large deep copy.
213
+ if randomization_seed is not None:
214
+ random.seed(randomization_seed)
215
+ random.shuffle(templates)
216
+ parameter_lists = [t.template_parameters for t in templates]
217
+ zipped_parameters = list(zip_longest(*parameter_lists))
218
+ for param_group in zipped_parameters:
219
+ for template, parameter in zip(templates, param_group):
229
220
  if _jailbreak_type == "upia":
230
221
  parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
231
222
  tasks.append(
@@ -276,6 +267,9 @@ class AdversarialSimulator:
276
267
  "target_population",
277
268
  "topic",
278
269
  "ch_template_placeholder",
270
+ "chatbot_name",
271
+ "name",
272
+ "group",
279
273
  ):
280
274
  template_parameters.pop(key, None)
281
275
  if conversation_category:
@@ -5,7 +5,17 @@ from enum import Enum
5
5
 
6
6
 
7
7
  class SupportedLanguages(Enum):
8
- """Supported languages for evaluation, using ISO standard language codes."""
8
+ """Supported languages for evaluation, using ISO standard language codes.
9
+
10
+ .. admonition:: Example:
11
+
12
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
+ :start-after: [START supported_languages]
14
+ :end-before: [END supported_languages]
15
+ :language: python
16
+ :dedent: 8
17
+ :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
+ """
9
19
 
10
20
  Spanish = "es"
11
21
  Italian = "it"
@@ -0,0 +1,3 @@
1
+ # ---------------------------------------------------------
2
+ # Copyright (c) Microsoft Corporation. All rights reserved.
3
+ # ---------------------------------------------------------