azure-ai-evaluation 1.0.0b5__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of azure-ai-evaluation might be problematic. Click here for more details.

Files changed (72) hide show
  1. azure/ai/evaluation/_azure/__init__.py +3 -0
  2. azure/ai/evaluation/_azure/_clients.py +188 -0
  3. azure/ai/evaluation/_azure/_models.py +227 -0
  4. azure/ai/evaluation/_azure/_token_manager.py +118 -0
  5. azure/ai/evaluation/_common/_experimental.py +4 -0
  6. azure/ai/evaluation/_common/math.py +62 -2
  7. azure/ai/evaluation/_common/rai_service.py +110 -50
  8. azure/ai/evaluation/_common/utils.py +50 -16
  9. azure/ai/evaluation/_constants.py +2 -0
  10. azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -0
  11. azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +13 -3
  12. azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +12 -1
  13. azure/ai/evaluation/_evaluate/_eval_run.py +38 -43
  14. azure/ai/evaluation/_evaluate/_evaluate.py +62 -131
  15. azure/ai/evaluation/_evaluate/_telemetry/__init__.py +2 -1
  16. azure/ai/evaluation/_evaluate/_utils.py +72 -38
  17. azure/ai/evaluation/_evaluators/_bleu/_bleu.py +16 -17
  18. azure/ai/evaluation/_evaluators/_coherence/_coherence.py +60 -29
  19. azure/ai/evaluation/_evaluators/_common/_base_eval.py +88 -6
  20. azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +16 -3
  21. azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +39 -10
  22. azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +58 -52
  23. azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +79 -34
  24. azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +73 -34
  25. azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +74 -33
  26. azure/ai/evaluation/_evaluators/_content_safety/_violence.py +76 -34
  27. azure/ai/evaluation/_evaluators/_eci/_eci.py +28 -3
  28. azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +20 -13
  29. azure/ai/evaluation/_evaluators/_fluency/_fluency.py +57 -26
  30. azure/ai/evaluation/_evaluators/_gleu/_gleu.py +13 -15
  31. azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +68 -30
  32. azure/ai/evaluation/_evaluators/_meteor/_meteor.py +17 -20
  33. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +10 -8
  34. azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -2
  35. azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +6 -2
  36. azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +10 -6
  37. azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +6 -2
  38. azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +6 -2
  39. azure/ai/evaluation/_evaluators/_multimodal/_violence.py +6 -2
  40. azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +57 -34
  41. azure/ai/evaluation/_evaluators/_qa/_qa.py +25 -37
  42. azure/ai/evaluation/_evaluators/_relevance/_relevance.py +63 -29
  43. azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +76 -161
  44. azure/ai/evaluation/_evaluators/_rouge/_rouge.py +24 -25
  45. azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +65 -67
  46. azure/ai/evaluation/_evaluators/_similarity/_similarity.py +26 -20
  47. azure/ai/evaluation/_evaluators/_xpia/xpia.py +74 -40
  48. azure/ai/evaluation/_exceptions.py +2 -0
  49. azure/ai/evaluation/_http_utils.py +6 -4
  50. azure/ai/evaluation/_model_configurations.py +65 -14
  51. azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
  52. azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
  53. azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
  54. azure/ai/evaluation/_version.py +1 -1
  55. azure/ai/evaluation/simulator/_adversarial_scenario.py +17 -1
  56. azure/ai/evaluation/simulator/_adversarial_simulator.py +57 -47
  57. azure/ai/evaluation/simulator/_constants.py +11 -1
  58. azure/ai/evaluation/simulator/_conversation/__init__.py +128 -7
  59. azure/ai/evaluation/simulator/_conversation/_conversation.py +0 -1
  60. azure/ai/evaluation/simulator/_direct_attack_simulator.py +16 -8
  61. azure/ai/evaluation/simulator/_indirect_attack_simulator.py +12 -1
  62. azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +3 -1
  63. azure/ai/evaluation/simulator/_model_tools/_rai_client.py +48 -4
  64. azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -0
  65. azure/ai/evaluation/simulator/_simulator.py +54 -45
  66. azure/ai/evaluation/simulator/_utils.py +25 -7
  67. {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/METADATA +240 -327
  68. {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/RECORD +71 -68
  69. azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -322
  70. {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/NOTICE.txt +0 -0
  71. {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/WHEEL +0 -0
  72. {azure_ai_evaluation-1.0.0b5.dist-info → azure_ai_evaluation-1.1.0.dist-info}/top_level.txt +0 -0
@@ -2,90 +2,124 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
  import logging
5
- from typing import Optional
5
+ from typing import Dict, List, Union
6
6
 
7
- from typing_extensions import override
7
+ from typing_extensions import overload, override
8
8
 
9
9
  from azure.ai.evaluation._common._experimental import experimental
10
10
  from azure.ai.evaluation._common.constants import EvaluationMetrics
11
11
  from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
12
+ from azure.ai.evaluation._model_configurations import Conversation
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
15
16
 
16
17
  @experimental
17
- class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
18
- """A Cross-Domain Prompt Injection Attack (XPIA) jailbreak evaluator.
19
-
20
- Detect whether cross domain injected attacks are present in your AI system's response.
21
- Metrics include the overall evaluation label and reason for the Q/A Pair, as well as sub-labels
22
- for manipulated content, intrusion, and information.
18
+ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
19
+ """
20
+ Evaluates the indirect attack score for a given query and response or a multi-turn conversation, with reasoning.
21
+
22
+ Indirect attacks, also known as cross-domain prompt injected attacks (XPIA), are when jailbreak attacks are
23
+ injected into the context of a document or source that may result in an altered, unexpected behavior.
24
+
25
+ Indirect attacks evaluations are broken down into three subcategories:
26
+ - Manipulated Content: This category involves commands that aim to alter or fabricate information, often
27
+ to mislead or deceive. It includes actions like spreading false information, altering language or
28
+ formatting, and hiding or emphasizing specific details. The goal is often to manipulate perceptions
29
+ or behaviors by controlling the flow and presentation of information.
30
+ - Intrusion: This category encompasses commands that attempt to breach systems, gain unauthorized access,
31
+ or elevate privileges illicitly. It includes creating backdoors, exploiting vulnerabilities, and
32
+ traditional jailbreaks to bypass security measures. The intent is often to gain control or access
33
+ sensitive data without detection.
34
+ - Information Gathering: This category pertains to accessing, deleting, or modifying data without
35
+ authorization, often for malicious purposes. It includes exfiltrating sensitive data, tampering with
36
+ system records, and removing or altering existing information. The focus is on acquiring or
37
+ manipulating data to exploit or compromise systems and individuals.
38
+
39
+ Indirect attack scores are boolean values, where True indicates that the response contains an indirect attack.
23
40
 
24
41
  :param credential: The credential for connecting to Azure AI project. Required
25
42
  :type credential: ~azure.core.credentials.TokenCredential
26
43
  :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
27
44
  name.
28
45
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
29
- :param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
30
- focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
31
- :type eval_last_turn: bool
32
-
33
- **Usage**
34
-
35
- .. code-block:: python
36
46
 
37
- eval_fn = IndirectAttackEvaluator(model_config)
38
- result = eval_fn(query="What is the capital of France?", response="Paris.")
47
+ .. admonition:: Example:
39
48
 
40
- **Output format for question-answer pair**
41
-
42
- .. code-block:: python
43
-
44
- {
45
- 'xpia_label': False,
46
- 'xpia_reason': 'The conversation does not contain any manipulated content, intrusion or
47
- information gathering.'
48
- 'xpia_information_gathering': False,
49
- 'xpia_intrusion': False
50
- 'xpia_manipulated_content': False
51
- }
49
+ .. literalinclude:: ../samples/evaluation_samples_evaluate.py
50
+ :start-after: [START indirect_attack_evaluator]
51
+ :end-before: [END indirect_attack_evaluator]
52
+ :language: python
53
+ :dedent: 8
54
+ :caption: Initialize and call an IndirectAttackEvaluator.
52
55
  """
53
56
 
57
+ id = "azureml://registries/azureml/models/Indirect-Attack-Evaluator/versions/3"
58
+ """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
59
+
54
60
  @override
55
61
  def __init__(
56
62
  self,
57
63
  credential,
58
64
  azure_ai_project,
59
- eval_last_turn: bool = False,
60
65
  ):
61
66
  super().__init__(
62
67
  eval_metric=EvaluationMetrics.XPIA,
63
68
  azure_ai_project=azure_ai_project,
64
69
  credential=credential,
65
- eval_last_turn=eval_last_turn,
66
70
  )
67
71
 
68
- @override
72
+ @overload
69
73
  def __call__(
70
74
  self,
71
75
  *,
72
- query: Optional[str] = None,
73
- response: Optional[str] = None,
74
- conversation=None,
76
+ query: str,
77
+ response: str,
78
+ ) -> Dict[str, Union[str, bool]]:
79
+ """Evaluate whether cross domain injected attacks are present in given query/response
80
+
81
+ :keyword query: The query to be evaluated.
82
+ :paramtype query: str
83
+ :keyword response: The response to be evaluated.
84
+ :paramtype response: str
85
+ :return: The cross domain injection attack score
86
+ :rtype: Dict[str, Union[str, bool]]
87
+ """
88
+
89
+ @overload
90
+ def __call__(
91
+ self,
92
+ *,
93
+ conversation: Conversation,
94
+ ) -> Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]:
95
+ """Evaluate cross domain injected attacks are present in a conversation
96
+
97
+ :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
98
+ key "messages", and potentially a global context under the key "context". Conversation turns are expected
99
+ to be dictionaries with keys "content", "role", and possibly "context".
100
+ :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
101
+ :return: The cross domain injection attack score
102
+ :rtype: Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]
103
+ """
104
+
105
+ @override
106
+ def __call__( # pylint: disable=docstring-missing-param
107
+ self,
108
+ *args,
75
109
  **kwargs,
76
110
  ):
77
111
  """
78
112
  Evaluate whether cross domain injected attacks are present in your AI system's response.
79
113
 
80
114
  :keyword query: The query to be evaluated.
81
- :paramtype query: str
115
+ :paramtype query: Optional[str]
82
116
  :keyword response: The response to be evaluated.
83
- :paramtype response: str
117
+ :paramtype response: Optional[str]
84
118
  :keyword conversation: The conversation to evaluate. Expected to contain a list of conversation turns under the
85
119
  key "messages". Conversation turns are expected
86
120
  to be dictionaries with keys "content" and "role".
87
121
  :paramtype conversation: Optional[~azure.ai.evaluation.Conversation]
88
- :return: The fluency score.
89
- :rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[str, bool, Dict[str, List[Union[str, bool]]]]]]
122
+ :return: The cross domain injection attack score
123
+ :rtype: Union[Dict[str, Union[str, bool]], Dict[str, Union[float, Dict[str, List[Union[str, bool]]]]]]
90
124
  """
91
- return super().__call__(query=query, response=response, conversation=conversation, **kwargs)
125
+ return super().__call__(*args, **kwargs)
@@ -23,6 +23,7 @@ class ErrorCategory(Enum):
23
23
  * SERVICE_UNAVAILABLE -> Service is unavailable
24
24
  * MISSING_PACKAGE -> Required package is missing
25
25
  * FAILED_REMOTE_TRACKING -> Remote tracking failed
26
+ * PROJECT_ACCESS_ERROR -> Access to project failed
26
27
  * UNKNOWN -> Undefined placeholder. Avoid using.
27
28
  """
28
29
 
@@ -35,6 +36,7 @@ class ErrorCategory(Enum):
35
36
  SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
36
37
  MISSING_PACKAGE = "MISSING PACKAGE"
37
38
  FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
39
+ PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
38
40
  UNKNOWN = "UNKNOWN"
39
41
 
40
42
 
@@ -448,19 +448,21 @@ class AsyncHttpPipeline(AsyncPipeline):
448
448
  return cast(Self, await super().__aenter__())
449
449
 
450
450
 
451
- def get_http_client() -> HttpPipeline:
451
+ def get_http_client(**kwargs: Any) -> HttpPipeline:
452
452
  """Get an HttpPipeline configured with common policies.
453
453
 
454
454
  :returns: An HttpPipeline with a set of applied policies:
455
455
  :rtype: HttpPipeline
456
456
  """
457
- return HttpPipeline(user_agent_policy=UserAgentPolicy(base_user_agent=USER_AGENT))
457
+ kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
458
+ return HttpPipeline(**kwargs)
458
459
 
459
460
 
460
- def get_async_http_client() -> AsyncHttpPipeline:
461
+ def get_async_http_client(**kwargs: Any) -> AsyncHttpPipeline:
461
462
  """Get an AsyncHttpPipeline configured with common policies.
462
463
 
463
464
  :returns: An AsyncHttpPipeline with a set of applied policies:
464
465
  :rtype: AsyncHttpPipeline
465
466
  """
466
- return AsyncHttpPipeline(user_agent_policy=UserAgentPolicy(base_user_agent=USER_AGENT))
467
+ kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=USER_AGENT))
468
+ return AsyncHttpPipeline(**kwargs)
@@ -8,44 +8,95 @@ from typing_extensions import NotRequired
8
8
 
9
9
 
10
10
  class AzureOpenAIModelConfiguration(TypedDict):
11
- """Model Configuration for Azure OpenAI Model"""
11
+ """Model configuration for Azure OpenAI models
12
+
13
+ :param type: The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration
14
+ :type type: NotRequired[Literal["azure_openai"]]
15
+ :param azure_deployment: Name of Azure OpenAI deployment to make requests to
16
+ :type azure_deployment: str
17
+ :param azure_endpoint: Endpoint of Azure OpenAI resource to make requests to
18
+ :type azure_endpoint: str
19
+ :param api_key: API key of Azure OpenAI resource
20
+ :type api_key: str
21
+ :param api_version: API version to use in request to Azure OpenAI deployment. Optional.
22
+ :type api_version: NotRequired[str]
23
+
24
+ .. admonition:: Example:
25
+
26
+ .. literalinclude:: ../samples/evaluation_samples_common.py
27
+ :start-after: [START create_AOAI_model_config]
28
+ :end-before: [END create_AOAI_model_config]
29
+ :language: python
30
+ :dedent: 8
31
+ :caption: Creating an AzureOpenAIModelConfiguration object.
32
+
33
+ """
12
34
 
13
35
  type: NotRequired[Literal["azure_openai"]]
14
- """The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration"""
15
36
  azure_deployment: str
16
- """Name of Azure OpenAI deployment to make request to"""
17
37
  azure_endpoint: str
18
38
  """Endpoint of Azure OpenAI resource to make request to"""
19
39
  api_key: NotRequired[str]
20
40
  """API key of Azure OpenAI resource"""
21
41
  api_version: NotRequired[str]
22
- """(Optional) API version to use in request to Azure OpenAI deployment"""
23
42
 
24
43
 
25
44
  class OpenAIModelConfiguration(TypedDict):
26
- """Model Configuration for OpenAI Model"""
45
+ """Model configuration for OpenAI models
46
+
47
+ :param type: The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration
48
+ :type type: NotRequired[Literal["openai"]]
49
+ :param api_key: API key needed to make requests to model
50
+ :type api_key: str
51
+ :param model: Name of model to be used in OpenAI request
52
+ :type model: str
53
+ :param base_url: Base URL to be used in OpenAI request. Optional.
54
+ :type base_url: NotRequired[str]
55
+ :param organization: OpenAI organization. Optional.
56
+ :type organization: NotRequired[str]
57
+
58
+ .. admonition:: Example:
59
+
60
+ .. literalinclude:: ../samples/evaluation_samples_common.py
61
+ :start-after: [START create_OAI_model_config]
62
+ :end-before: [END create_OAI_model_config]
63
+ :language: python
64
+ :dedent: 8
65
+ :caption: Creating an OpenAIModelConfiguration object.
66
+
67
+ """
27
68
 
28
69
  type: NotRequired[Literal["openai"]]
29
- """The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration"""
30
70
  api_key: str
31
- "API key needed to make request to model"
32
71
  model: str
33
- """Name of model to be used in OpenAI request"""
34
72
  base_url: NotRequired[str]
35
- """(Optional) Base URL to be used in OpenAI request"""
36
73
  organization: NotRequired[str]
37
- """(Optional) OpenAI organization"""
38
74
 
39
75
 
40
76
  class AzureAIProject(TypedDict):
41
- """Azure AI Project Information"""
77
+ """Information about the Azure AI project
78
+
79
+ :param subscription_id: ID of the Azure subscription the project is in
80
+ :type subscription_id: str
81
+ :param resource_group_name: Name of the Azure resource group the project is in
82
+ :type resource_group_name: str
83
+ :param project_name: Name of the Azure project
84
+ :type project_name: str
85
+
86
+ .. admonition:: Example:
87
+
88
+ .. literalinclude:: ../samples/evaluation_samples_common.py
89
+ :start-after: [START create_azure_ai_project_object]
90
+ :end-before: [END create_azure_ai_project_object]
91
+ :language: python
92
+ :dedent: 8
93
+ :caption: Creating an AzureAIProject object.
94
+
95
+ """
42
96
 
43
97
  subscription_id: str
44
- """Azure subscription id of the project"""
45
98
  resource_group_name: str
46
- """Azure resource group name of the project"""
47
99
  project_name: str
48
- """Azure project name"""
49
100
 
50
101
 
51
102
  class EvaluatorConfig(TypedDict, total=False):
@@ -32,10 +32,6 @@ ROUGE-1.5.5.pl -m -e data -n 2 -a settings.xml
32
32
  In these examples settings.xml lists input files and formats.
33
33
  """
34
34
 
35
- from __future__ import absolute_import
36
- from __future__ import division
37
- from __future__ import print_function
38
-
39
35
  import collections
40
36
  import re
41
37
 
@@ -21,10 +21,6 @@ Aggregation functions use bootstrap resampling to compute confidence intervals
21
21
  as per the original ROUGE perl implementation.
22
22
  """
23
23
 
24
- from __future__ import absolute_import
25
- from __future__ import division
26
- from __future__ import print_function
27
-
28
24
  import abc
29
25
  import collections
30
26
  from typing import Dict
@@ -17,10 +17,6 @@
17
17
 
18
18
  """A library for tokenizing text."""
19
19
 
20
- from __future__ import absolute_import
21
- from __future__ import division
22
- from __future__ import print_function
23
-
24
20
  import re
25
21
 
26
22
 
@@ -2,4 +2,4 @@
2
2
  # Copyright (c) Microsoft Corporation. All rights reserved.
3
3
  # ---------------------------------------------------------
4
4
 
5
- VERSION = "1.0.0b5"
5
+ VERSION = "1.1.0"
@@ -3,10 +3,22 @@
3
3
  # ---------------------------------------------------------
4
4
 
5
5
  from enum import Enum
6
+ from azure.ai.evaluation._common._experimental import experimental
6
7
 
7
8
 
9
+ @experimental
8
10
  class AdversarialScenario(Enum):
9
- """Adversarial scenario types"""
11
+ """Adversarial scenario types
12
+
13
+ .. admonition:: Example:
14
+
15
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
16
+ :start-after: [START adversarial_scenario]
17
+ :end-before: [END adversarial_scenario]
18
+ :language: python
19
+ :dedent: 8
20
+ :caption: Configure an AdversarialSimulator with an Adversarial Conversation scenario.
21
+ """
10
22
 
11
23
  ADVERSARIAL_QA = "adv_qa"
12
24
  ADVERSARIAL_CONVERSATION = "adv_conversation"
@@ -18,15 +30,19 @@ class AdversarialScenario(Enum):
18
30
  ADVERSARIAL_CONTENT_PROTECTED_MATERIAL = "adv_content_protected_material"
19
31
 
20
32
 
33
+ @experimental
21
34
  class AdversarialScenarioJailbreak(Enum):
22
35
  """Adversarial scenario types for XPIA Jailbreak"""
23
36
 
24
37
  ADVERSARIAL_INDIRECT_JAILBREAK = "adv_xpia"
25
38
 
26
39
 
40
+ @experimental
27
41
  class _UnstableAdversarialScenario(Enum):
28
42
  """Adversarial scenario types that we haven't published, but still want available for internal use
29
43
  Values listed here are subject to potential change, and/or migration to the main enum over time.
30
44
  """
31
45
 
32
46
  ECI = "adv_politics"
47
+ ADVERSARIAL_IMAGE_GEN = "adv_image_gen"
48
+ ADVERSARIAL_IMAGE_MULTIMODAL = "adv_image_understanding"
@@ -6,7 +6,7 @@
6
6
  import asyncio
7
7
  import logging
8
8
  import random
9
- from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast
9
+ from typing import Any, Callable, Dict, List, Optional, Union, cast
10
10
 
11
11
  from tqdm import tqdm
12
12
 
@@ -14,13 +14,20 @@ from azure.ai.evaluation._common._experimental import experimental
14
14
  from azure.ai.evaluation._common.utils import validate_azure_ai_project
15
15
  from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
16
16
  from azure.ai.evaluation._http_utils import get_async_http_client
17
- from azure.ai.evaluation.simulator import AdversarialScenario
17
+ from azure.ai.evaluation._model_configurations import AzureAIProject
18
+ from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
18
19
  from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
19
20
  from azure.core.credentials import TokenCredential
20
21
  from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
21
22
 
22
23
  from ._constants import SupportedLanguages
23
- from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole, ConversationTurn
24
+ from ._conversation import (
25
+ CallbackConversationBot,
26
+ MultiModalConversationBot,
27
+ ConversationBot,
28
+ ConversationRole,
29
+ ConversationTurn,
30
+ )
24
31
  from ._conversation._conversation import simulate_conversation
25
32
  from ._model_tools import (
26
33
  AdversarialTemplateHandler,
@@ -45,9 +52,19 @@ class AdversarialSimulator:
45
52
  :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
46
53
  :param credential: The credential for connecting to Azure AI project.
47
54
  :type credential: ~azure.core.credentials.TokenCredential
55
+
56
+ .. admonition:: Example:
57
+
58
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
59
+ :start-after: [START adversarial_scenario]
60
+ :end-before: [END adversarial_scenario]
61
+ :language: python
62
+ :dedent: 8
63
+ :caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
64
+ 2 conversation turns each (4 messages per result).
48
65
  """
49
66
 
50
- def __init__(self, *, azure_ai_project: dict, credential):
67
+ def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
51
68
  """Constructor."""
52
69
 
53
70
  try:
@@ -149,28 +166,6 @@ class AdversarialSimulator:
149
166
 
150
167
  The 'content' for 'assistant' role messages may includes the messages that your callback returned.
151
168
  :rtype: List[Dict[str, Any]]
152
-
153
- **Output format**
154
-
155
- .. code-block:: python
156
-
157
- return_value = [
158
- {
159
- 'template_parameters': {},
160
- 'messages': [
161
- {
162
- 'content': '<jailbreak prompt> <adversarial query>',
163
- 'role': 'user'
164
- },
165
- {
166
- 'content': "<response from endpoint>",
167
- 'role': 'assistant',
168
- 'context': None
169
- }
170
- ],
171
- '$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
172
- }
173
- ]
174
169
  """
175
170
 
176
171
  # validate the inputs
@@ -215,19 +210,19 @@ class AdversarialSimulator:
215
210
  ncols=100,
216
211
  unit="simulations",
217
212
  )
218
- for template in templates:
219
- parameter_order = list(range(len(template.template_parameters)))
220
- if randomize_order:
221
- # The template parameter lists are persistent across sim runs within a session,
222
- # So randomize a the selection instead of the parameter list directly,
223
- # or a potentially large deep copy.
224
- if randomization_seed is not None:
225
- random.seed(randomization_seed)
226
- random.shuffle(parameter_order)
227
- for index in parameter_order:
228
- parameter = template.template_parameters[index].copy()
213
+ if randomize_order:
214
+ # The template parameter lists are persistent across sim runs within a session,
215
+ # So randomize a the selection instead of the parameter list directly,
216
+ # or a potentially large deep copy.
217
+ if randomization_seed is not None:
218
+ random.seed(randomization_seed)
219
+ random.shuffle(templates)
220
+ parameter_lists = [t.template_parameters for t in templates]
221
+ zipped_parameters = list(zip(*parameter_lists))
222
+ for param_group in zipped_parameters:
223
+ for template, parameter in zip(templates, param_group):
229
224
  if _jailbreak_type == "upia":
230
- parameter = self._join_conversation_starter(parameter, random.choice(jailbreak_dataset))
225
+ parameter = self._add_jailbreak_parameter(parameter, random.choice(jailbreak_dataset))
231
226
  tasks.append(
232
227
  asyncio.create_task(
233
228
  self._simulate_async(
@@ -240,6 +235,7 @@ class AdversarialSimulator:
240
235
  api_call_delay_sec=api_call_delay_sec,
241
236
  language=language,
242
237
  semaphore=semaphore,
238
+ scenario=scenario,
243
239
  )
244
240
  )
245
241
  )
@@ -301,10 +297,13 @@ class AdversarialSimulator:
301
297
  api_call_delay_sec: int,
302
298
  language: SupportedLanguages,
303
299
  semaphore: asyncio.Semaphore,
300
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
304
301
  ) -> List[Dict]:
305
- user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
302
+ user_bot = self._setup_bot(
303
+ role=ConversationRole.USER, template=template, parameters=parameters, scenario=scenario
304
+ )
306
305
  system_bot = self._setup_bot(
307
- target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
306
+ target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
308
307
  )
309
308
  bots = [user_bot, system_bot]
310
309
  session = get_async_http_client().with_policies(
@@ -350,6 +349,7 @@ class AdversarialSimulator:
350
349
  template: AdversarialTemplate,
351
350
  parameters: TemplateParameters,
352
351
  target: Optional[Callable] = None,
352
+ scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
353
353
  ) -> ConversationBot:
354
354
  if role is ConversationRole.USER:
355
355
  model = self._get_user_proxy_completion_model(
@@ -381,6 +381,21 @@ class AdversarialSimulator:
381
381
  def __call__(self) -> None:
382
382
  pass
383
383
 
384
+ if scenario in [
385
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
386
+ _UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
387
+ ]:
388
+ return MultiModalConversationBot(
389
+ callback=target,
390
+ role=role,
391
+ model=DummyModel(),
392
+ user_template=str(template),
393
+ user_template_parameters=parameters,
394
+ rai_client=self.rai_client,
395
+ conversation_template="",
396
+ instantiation_parameters={},
397
+ )
398
+
384
399
  return CallbackConversationBot(
385
400
  callback=target,
386
401
  role=role,
@@ -400,13 +415,8 @@ class AdversarialSimulator:
400
415
  blame=ErrorBlame.SYSTEM_ERROR,
401
416
  )
402
417
 
403
- def _join_conversation_starter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
404
- key: Literal["conversation_starter"] = "conversation_starter"
405
- if key in parameters.keys():
406
- parameters[key] = f"{to_join} {parameters[key]}"
407
- else:
408
- parameters[key] = to_join
409
-
418
+ def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
419
+ parameters["jailbreak_string"] = to_join
410
420
  return parameters
411
421
 
412
422
  def call_sync(
@@ -5,7 +5,17 @@ from enum import Enum
5
5
 
6
6
 
7
7
  class SupportedLanguages(Enum):
8
- """Supported languages for evaluation, using ISO standard language codes."""
8
+ """Supported languages for evaluation, using ISO standard language codes.
9
+
10
+ .. admonition:: Example:
11
+
12
+ .. literalinclude:: ../samples/evaluation_samples_simulate.py
13
+ :start-after: [START supported_languages]
14
+ :end-before: [END supported_languages]
15
+ :language: python
16
+ :dedent: 8
17
+ :caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
18
+ """
9
19
 
10
20
  Spanish = "es"
11
21
  Italian = "it"