azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -1,102 +1,82 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
+
# pylint: disable=C0301,C0114,R0913,R0903
|
|
4
5
|
# noqa: E501
|
|
5
|
-
import functools
|
|
6
6
|
import logging
|
|
7
7
|
from random import randint
|
|
8
|
-
from typing import Callable, Optional
|
|
9
|
-
|
|
10
|
-
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
8
|
+
from typing import Callable, Optional, cast, Union
|
|
11
9
|
|
|
10
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
11
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
12
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
12
13
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
-
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
14
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
15
|
-
from azure.
|
|
15
|
+
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
17
|
+
from azure.core.credentials import TokenCredential
|
|
16
18
|
|
|
17
19
|
from ._adversarial_simulator import AdversarialSimulator
|
|
18
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
20
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
|
|
24
|
-
"""Decorator to monitor adversarial scenario.
|
|
25
|
-
|
|
26
|
-
:param func: The function to be decorated.
|
|
27
|
-
:type func: Callable
|
|
28
|
-
:return: The decorated function.
|
|
29
|
-
:rtype: Callable
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
@functools.wraps(func)
|
|
33
|
-
def wrapper(*args, **kwargs):
|
|
34
|
-
scenario = str(kwargs.get("scenario", None))
|
|
35
|
-
max_conversation_turns = kwargs.get("max_conversation_turns", None)
|
|
36
|
-
max_simulation_results = kwargs.get("max_simulation_results", None)
|
|
37
|
-
decorated_func = monitor_operation(
|
|
38
|
-
activity_name="jailbreak.adversarial.simulator.call",
|
|
39
|
-
activity_type=ActivityType.PUBLICAPI,
|
|
40
|
-
custom_dimensions={
|
|
41
|
-
"scenario": scenario,
|
|
42
|
-
"max_conversation_turns": max_conversation_turns,
|
|
43
|
-
"max_simulation_results": max_simulation_results,
|
|
44
|
-
},
|
|
45
|
-
)(func)
|
|
46
|
-
|
|
47
|
-
return decorated_func(*args, **kwargs)
|
|
48
|
-
|
|
49
|
-
return wrapper
|
|
50
|
-
|
|
51
|
-
|
|
25
|
+
@experimental
|
|
52
26
|
class DirectAttackSimulator:
|
|
53
27
|
"""
|
|
54
28
|
Initialize a UPIA (user prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
55
29
|
This simulator converses with your AI system using prompts designed to interrupt normal functionality.
|
|
56
30
|
|
|
57
|
-
:param azure_ai_project: The
|
|
58
|
-
name.
|
|
59
|
-
:type azure_ai_project:
|
|
31
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
32
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
33
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
60
34
|
:param credential: The credential for connecting to Azure AI project.
|
|
61
35
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
36
|
+
|
|
37
|
+
.. admonition:: Example:
|
|
38
|
+
|
|
39
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
40
|
+
:start-after: [START direct_attack_simulator]
|
|
41
|
+
:end-before: [END direct_attack_simulator]
|
|
42
|
+
:language: python
|
|
43
|
+
:dedent: 8
|
|
44
|
+
:caption: Run the DirectAttackSimulator to produce 2 results with 3 conversation turns each (6 messages in each result).
|
|
62
45
|
"""
|
|
63
46
|
|
|
64
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential
|
|
47
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
65
48
|
"""Constructor."""
|
|
66
|
-
|
|
67
|
-
if
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
blame=ErrorBlame.USER_ERROR,
|
|
49
|
+
|
|
50
|
+
if is_onedp_project(azure_ai_project):
|
|
51
|
+
self.azure_ai_project = azure_ai_project
|
|
52
|
+
self.credential = cast(TokenCredential, credential)
|
|
53
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
54
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
55
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
56
|
+
credential=self.credential,
|
|
75
57
|
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
58
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
59
|
+
else:
|
|
60
|
+
try:
|
|
61
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
62
|
+
except EvaluationException as e:
|
|
63
|
+
raise EvaluationException(
|
|
64
|
+
message=e.message,
|
|
65
|
+
internal_message=e.internal_message,
|
|
66
|
+
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
67
|
+
category=e.category,
|
|
68
|
+
blame=e.blame,
|
|
69
|
+
) from e
|
|
70
|
+
self.credential = cast(TokenCredential, credential)
|
|
71
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
72
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
73
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
74
|
+
credential=self.credential,
|
|
85
75
|
)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
elif "credential" in azure_ai_project:
|
|
89
|
-
credential = azure_ai_project["credential"]
|
|
90
|
-
self.credential = credential
|
|
91
|
-
self.azure_ai_project = azure_ai_project
|
|
92
|
-
self.token_manager = ManagedIdentityAPITokenManager(
|
|
93
|
-
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
94
|
-
logger=logging.getLogger("AdversarialSimulator"),
|
|
95
|
-
credential=credential,
|
|
96
|
-
)
|
|
97
|
-
self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
|
|
76
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
77
|
+
|
|
98
78
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
99
|
-
azure_ai_project=azure_ai_project, rai_client=self.rai_client
|
|
79
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
100
80
|
)
|
|
101
81
|
|
|
102
82
|
def _ensure_service_dependencies(self):
|
|
@@ -110,7 +90,6 @@ class DirectAttackSimulator:
|
|
|
110
90
|
blame=ErrorBlame.USER_ERROR,
|
|
111
91
|
)
|
|
112
92
|
|
|
113
|
-
# @monitor_adversarial_scenario
|
|
114
93
|
async def __call__(
|
|
115
94
|
self,
|
|
116
95
|
*,
|
|
@@ -169,7 +148,7 @@ class DirectAttackSimulator:
|
|
|
169
148
|
- '**$schema**': A string indicating the schema URL for the conversation format.
|
|
170
149
|
|
|
171
150
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
172
|
-
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
151
|
+
:rtype: Dict[str, [List[Dict[str, Any]]]]
|
|
173
152
|
|
|
174
153
|
**Output format**
|
|
175
154
|
|
|
@@ -232,7 +211,7 @@ class DirectAttackSimulator:
|
|
|
232
211
|
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
233
212
|
api_call_delay_sec=api_call_delay_sec,
|
|
234
213
|
concurrent_async_task=concurrent_async_task,
|
|
235
|
-
randomize_order=
|
|
214
|
+
randomize_order=False,
|
|
236
215
|
randomization_seed=randomization_seed,
|
|
237
216
|
)
|
|
238
217
|
jb_sim = AdversarialSimulator(azure_ai_project=self.azure_ai_project, credential=self.credential)
|
|
@@ -246,7 +225,7 @@ class DirectAttackSimulator:
|
|
|
246
225
|
api_call_delay_sec=api_call_delay_sec,
|
|
247
226
|
concurrent_async_task=concurrent_async_task,
|
|
248
227
|
_jailbreak_type="upia",
|
|
249
|
-
randomize_order=
|
|
228
|
+
randomize_order=False,
|
|
250
229
|
randomization_seed=randomization_seed,
|
|
251
230
|
)
|
|
252
231
|
return {"jailbreak": jb_sim_results, "regular": regular_sim_results}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
from ._experimental import experimental
|
|
2
1
|
from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
3
2
|
from ._simulator_data_classes import ConversationHistory, Turn
|
|
4
3
|
|
|
5
|
-
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"
|
|
4
|
+
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
|
|
@@ -14,4 +14,5 @@ SUPPORTED_LANGUAGES_MAPPING = {
|
|
|
14
14
|
SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
|
|
15
15
|
SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
|
|
16
16
|
SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
|
|
17
|
+
SupportedLanguages.Korean: BASE_SUFFIX.replace("__language__", "korean"),
|
|
17
18
|
}
|
|
@@ -18,7 +18,7 @@ class Turn:
|
|
|
18
18
|
|
|
19
19
|
role: Union[str, ConversationRole]
|
|
20
20
|
content: str
|
|
21
|
-
context: str = None
|
|
21
|
+
context: Optional[str] = None
|
|
22
22
|
|
|
23
23
|
def to_dict(self) -> Dict[str, Optional[str]]:
|
|
24
24
|
"""
|
|
@@ -30,7 +30,19 @@ class Turn:
|
|
|
30
30
|
return {
|
|
31
31
|
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
32
32
|
"content": self.content,
|
|
33
|
-
"context": self.context,
|
|
33
|
+
"context": str(self.context),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def to_context_free_dict(self) -> Dict[str, Optional[str]]:
|
|
37
|
+
"""
|
|
38
|
+
Convert the conversation turn to a dictionary without context.
|
|
39
|
+
|
|
40
|
+
:returns: A dictionary representation of the conversation turn without context.
|
|
41
|
+
:rtype: Dict[str, Optional[str]]
|
|
42
|
+
"""
|
|
43
|
+
return {
|
|
44
|
+
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
45
|
+
"content": self.content,
|
|
34
46
|
}
|
|
35
47
|
|
|
36
48
|
def __repr__(self):
|
|
@@ -42,13 +54,13 @@ class ConversationHistory:
|
|
|
42
54
|
Conversation history class to keep track of the conversation turns in a conversation.
|
|
43
55
|
"""
|
|
44
56
|
|
|
45
|
-
def __init__(self):
|
|
57
|
+
def __init__(self) -> None:
|
|
46
58
|
"""
|
|
47
59
|
Initializes the conversation history with an empty list of turns.
|
|
48
60
|
"""
|
|
49
61
|
self.history: List[Turn] = []
|
|
50
62
|
|
|
51
|
-
def add_to_history(self, turn: Turn):
|
|
63
|
+
def add_to_history(self, turn: Turn) -> None:
|
|
52
64
|
"""
|
|
53
65
|
Adds a turn to the conversation history.
|
|
54
66
|
|
|
@@ -57,7 +69,7 @@ class ConversationHistory:
|
|
|
57
69
|
"""
|
|
58
70
|
self.history.append(turn)
|
|
59
71
|
|
|
60
|
-
def to_list(self) -> List[Dict[str, str]]:
|
|
72
|
+
def to_list(self) -> List[Dict[str, Optional[str]]]:
|
|
61
73
|
"""
|
|
62
74
|
Converts the conversation history to a list of dictionaries.
|
|
63
75
|
|
|
@@ -66,6 +78,15 @@ class ConversationHistory:
|
|
|
66
78
|
"""
|
|
67
79
|
return [turn.to_dict() for turn in self.history]
|
|
68
80
|
|
|
81
|
+
def to_context_free_list(self) -> List[Dict[str, Optional[str]]]:
|
|
82
|
+
"""
|
|
83
|
+
Converts the conversation history to a list of dictionaries without context.
|
|
84
|
+
|
|
85
|
+
:returns: A list of dictionaries representing the conversation turns without context.
|
|
86
|
+
:rtype: List[Dict[str, str]]
|
|
87
|
+
"""
|
|
88
|
+
return [turn.to_context_free_dict() for turn in self.history]
|
|
89
|
+
|
|
69
90
|
def __len__(self) -> int:
|
|
70
91
|
return len(self.history)
|
|
71
92
|
|
|
@@ -1,100 +1,90 @@
|
|
|
1
1
|
# ---------------------------------------------------------
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
|
+
# pylint: disable=C0301,C0114,R0913,R0903
|
|
4
5
|
# noqa: E501
|
|
5
|
-
import
|
|
6
|
+
import asyncio
|
|
6
7
|
import logging
|
|
7
|
-
|
|
8
|
+
import random
|
|
9
|
+
from typing import Callable, cast, Union, Optional
|
|
8
10
|
|
|
9
|
-
from
|
|
11
|
+
from tqdm import tqdm
|
|
10
12
|
|
|
13
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
14
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
11
15
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
16
|
+
from azure.ai.evaluation.simulator import AdversarialScenarioJailbreak, SupportedLanguages
|
|
12
17
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
13
|
-
from azure.ai.evaluation.
|
|
14
|
-
from azure.
|
|
18
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
19
|
+
from azure.core.credentials import TokenCredential
|
|
20
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
15
21
|
|
|
16
|
-
from ._adversarial_simulator import AdversarialSimulator
|
|
17
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
22
|
+
from ._adversarial_simulator import AdversarialSimulator, JsonLineList
|
|
18
23
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def monitor_adversarial_scenario(func) -> Callable:
|
|
23
|
-
"""Decorator to monitor adversarial scenario.
|
|
24
|
-
|
|
25
|
-
:param func: The function to be decorated.
|
|
26
|
-
:type func: Callable
|
|
27
|
-
:return: The decorated function.
|
|
28
|
-
:rtype: Callable
|
|
29
|
-
"""
|
|
24
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient
|
|
30
25
|
|
|
31
|
-
|
|
32
|
-
def wrapper(*args, **kwargs):
|
|
33
|
-
scenario = str(kwargs.get("scenario", None))
|
|
34
|
-
max_conversation_turns = kwargs.get("max_conversation_turns", None)
|
|
35
|
-
max_simulation_results = kwargs.get("max_simulation_results", None)
|
|
36
|
-
decorated_func = monitor_operation(
|
|
37
|
-
activity_name="xpia.adversarial.simulator.call",
|
|
38
|
-
activity_type=ActivityType.PUBLICAPI,
|
|
39
|
-
custom_dimensions={
|
|
40
|
-
"scenario": scenario,
|
|
41
|
-
"max_conversation_turns": max_conversation_turns,
|
|
42
|
-
"max_simulation_results": max_simulation_results,
|
|
43
|
-
},
|
|
44
|
-
)(func)
|
|
45
|
-
|
|
46
|
-
return decorated_func(*args, **kwargs)
|
|
47
|
-
|
|
48
|
-
return wrapper
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
49
27
|
|
|
50
28
|
|
|
51
|
-
|
|
29
|
+
@experimental
|
|
30
|
+
class IndirectAttackSimulator(AdversarialSimulator):
|
|
52
31
|
"""
|
|
53
32
|
Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
54
33
|
|
|
55
|
-
:param azure_ai_project: The
|
|
56
|
-
name.
|
|
57
|
-
:type azure_ai_project:
|
|
34
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
35
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
36
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
58
37
|
:param credential: The credential for connecting to Azure AI project.
|
|
59
38
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
39
|
+
|
|
40
|
+
.. admonition:: Example:
|
|
41
|
+
|
|
42
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
43
|
+
:start-after: [START indirect_attack_simulator]
|
|
44
|
+
:end-before: [END indirect_attack_simulator]
|
|
45
|
+
:language: python
|
|
46
|
+
:dedent: 8
|
|
47
|
+
:caption: Run the IndirectAttackSimulator to produce 1 result with 1 conversation turn (2 messages in the result).
|
|
60
48
|
"""
|
|
61
49
|
|
|
62
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential
|
|
50
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
63
51
|
"""Constructor."""
|
|
64
|
-
|
|
65
|
-
if
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
blame=ErrorBlame.USER_ERROR,
|
|
52
|
+
|
|
53
|
+
if is_onedp_project(azure_ai_project):
|
|
54
|
+
self.azure_ai_project = azure_ai_project
|
|
55
|
+
self.credential = cast(TokenCredential, credential)
|
|
56
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
57
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
58
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
59
|
+
credential=self.credential,
|
|
73
60
|
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
message=msg,
|
|
78
|
-
internal_message=msg,
|
|
79
|
-
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
80
|
-
category=ErrorCategory.MISSING_FIELD,
|
|
81
|
-
blame=ErrorBlame.USER_ERROR,
|
|
61
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
62
|
+
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
63
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
82
64
|
)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
65
|
+
else:
|
|
66
|
+
try:
|
|
67
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
68
|
+
except EvaluationException as e:
|
|
69
|
+
raise EvaluationException(
|
|
70
|
+
message=e.message,
|
|
71
|
+
internal_message=e.internal_message,
|
|
72
|
+
target=ErrorTarget.DIRECT_ATTACK_SIMULATOR,
|
|
73
|
+
category=e.category,
|
|
74
|
+
blame=e.blame,
|
|
75
|
+
) from e
|
|
76
|
+
|
|
77
|
+
self.credential = cast(TokenCredential, credential)
|
|
78
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
79
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
80
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
81
|
+
credential=self.credential,
|
|
82
|
+
)
|
|
83
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
84
|
+
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
85
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
86
|
+
)
|
|
87
|
+
super().__init__(azure_ai_project=azure_ai_project, credential=credential)
|
|
98
88
|
|
|
99
89
|
def _ensure_service_dependencies(self):
|
|
100
90
|
if self.rai_client is None:
|
|
@@ -107,33 +97,26 @@ class IndirectAttackSimulator:
|
|
|
107
97
|
blame=ErrorBlame.USER_ERROR,
|
|
108
98
|
)
|
|
109
99
|
|
|
110
|
-
# @monitor_adversarial_scenario
|
|
111
100
|
async def __call__(
|
|
112
101
|
self,
|
|
113
102
|
*,
|
|
114
|
-
scenario: AdversarialScenario,
|
|
115
103
|
target: Callable,
|
|
116
|
-
max_conversation_turns: int = 1,
|
|
117
104
|
max_simulation_results: int = 3,
|
|
118
105
|
api_call_retry_limit: int = 3,
|
|
119
106
|
api_call_retry_sleep_sec: int = 1,
|
|
120
107
|
api_call_delay_sec: int = 0,
|
|
121
108
|
concurrent_async_task: int = 3,
|
|
109
|
+
randomization_seed: Optional[int] = None,
|
|
110
|
+
**kwargs,
|
|
122
111
|
):
|
|
123
112
|
"""
|
|
124
113
|
Initializes the XPIA (cross domain prompt injected attack) jailbreak adversarial simulator with a project scope.
|
|
125
114
|
This simulator converses with your AI system using prompts injected into the context to interrupt normal
|
|
126
115
|
expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside
|
|
127
116
|
the scope of your AI system.
|
|
128
|
-
|
|
129
|
-
:keyword scenario: Enum value specifying the adversarial scenario used for generating inputs.
|
|
130
|
-
:paramtype scenario: azure.ai.evaluation.simulator.AdversarialScenario
|
|
131
117
|
:keyword target: The target function to simulate adversarial inputs against.
|
|
132
118
|
This function should be asynchronous and accept a dictionary representing the adversarial input.
|
|
133
119
|
:paramtype target: Callable
|
|
134
|
-
:keyword max_conversation_turns: The maximum number of conversation turns to simulate.
|
|
135
|
-
Defaults to 1.
|
|
136
|
-
:paramtype max_conversation_turns: int
|
|
137
120
|
:keyword max_simulation_results: The maximum number of simulation results to return.
|
|
138
121
|
Defaults to 3.
|
|
139
122
|
:paramtype max_simulation_results: int
|
|
@@ -149,6 +132,9 @@ class IndirectAttackSimulator:
|
|
|
149
132
|
:keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
|
|
150
133
|
Defaults to 3.
|
|
151
134
|
:paramtype concurrent_async_task: int
|
|
135
|
+
:keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
|
|
136
|
+
default seed is used. Defaults to None.
|
|
137
|
+
:paramtype randomization_seed: Optional[int]
|
|
152
138
|
:return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
|
|
153
139
|
|
|
154
140
|
- 'template_parameters': A dictionary with parameters used in the conversation template,
|
|
@@ -170,11 +156,11 @@ class IndirectAttackSimulator:
|
|
|
170
156
|
'template_parameters': {},
|
|
171
157
|
'messages': [
|
|
172
158
|
{
|
|
173
|
-
'content': '<
|
|
159
|
+
'content': '<adversarial query>',
|
|
174
160
|
'role': 'user'
|
|
175
161
|
},
|
|
176
162
|
{
|
|
177
|
-
'content': "<response from
|
|
163
|
+
'content': "<response from your callback>",
|
|
178
164
|
'role': 'assistant',
|
|
179
165
|
'context': None
|
|
180
166
|
}
|
|
@@ -183,25 +169,80 @@ class IndirectAttackSimulator:
|
|
|
183
169
|
}]
|
|
184
170
|
}
|
|
185
171
|
"""
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
172
|
+
# values that cannot be changed:
|
|
173
|
+
scenario = AdversarialScenarioJailbreak.ADVERSARIAL_INDIRECT_JAILBREAK
|
|
174
|
+
max_conversation_turns = 2
|
|
175
|
+
language = SupportedLanguages.English
|
|
176
|
+
self._ensure_service_dependencies()
|
|
177
|
+
templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
|
|
178
|
+
concurrent_async_task = min(concurrent_async_task, 1000)
|
|
179
|
+
semaphore = asyncio.Semaphore(concurrent_async_task)
|
|
180
|
+
sim_results = []
|
|
181
|
+
tasks = []
|
|
182
|
+
total_tasks = sum(len(t.template_parameters) for t in templates)
|
|
183
|
+
if max_simulation_results > total_tasks:
|
|
184
|
+
logger.warning(
|
|
185
|
+
"Cannot provide %s results due to maximum number of adversarial simulations that can be generated: %s."
|
|
186
|
+
"\n %s simulations will be generated.",
|
|
187
|
+
max_simulation_results,
|
|
188
|
+
total_tasks,
|
|
189
|
+
total_tasks,
|
|
194
190
|
)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
api_call_retry_limit=api_call_retry_limit,
|
|
202
|
-
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
203
|
-
api_call_delay_sec=api_call_delay_sec,
|
|
204
|
-
concurrent_async_task=concurrent_async_task,
|
|
205
|
-
_jailbreak_type="xpia",
|
|
191
|
+
total_tasks = min(total_tasks, max_simulation_results)
|
|
192
|
+
progress_bar = tqdm(
|
|
193
|
+
total=total_tasks,
|
|
194
|
+
desc="generating jailbreak simulations",
|
|
195
|
+
ncols=100,
|
|
196
|
+
unit="simulations",
|
|
206
197
|
)
|
|
207
|
-
|
|
198
|
+
|
|
199
|
+
# Apply randomization to templates if seed is provided
|
|
200
|
+
if randomization_seed is not None:
|
|
201
|
+
# Create a local random instance to avoid polluting global state
|
|
202
|
+
local_random = random.Random(randomization_seed)
|
|
203
|
+
local_random.shuffle(templates)
|
|
204
|
+
|
|
205
|
+
for template in templates:
|
|
206
|
+
for parameter in template.template_parameters:
|
|
207
|
+
tasks.append(
|
|
208
|
+
asyncio.create_task(
|
|
209
|
+
self._simulate_async(
|
|
210
|
+
target=target,
|
|
211
|
+
template=template,
|
|
212
|
+
parameters=parameter,
|
|
213
|
+
max_conversation_turns=max_conversation_turns,
|
|
214
|
+
api_call_retry_limit=api_call_retry_limit,
|
|
215
|
+
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
216
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
217
|
+
language=language,
|
|
218
|
+
semaphore=semaphore,
|
|
219
|
+
scenario=scenario,
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
if len(tasks) >= max_simulation_results:
|
|
224
|
+
break
|
|
225
|
+
if len(tasks) >= max_simulation_results:
|
|
226
|
+
break
|
|
227
|
+
for task in asyncio.as_completed(tasks):
|
|
228
|
+
completed_task = await task # type: ignore
|
|
229
|
+
template_parameters = completed_task.get("template_parameters", {}) # type: ignore
|
|
230
|
+
xpia_attack_type = template_parameters.get("xpia_attack_type", "") # type: ignore
|
|
231
|
+
action = template_parameters.get("action", "") # type: ignore
|
|
232
|
+
document_type = template_parameters.get("document_type", "") # type: ignore
|
|
233
|
+
sim_results.append(
|
|
234
|
+
{
|
|
235
|
+
"messages": completed_task["messages"], # type: ignore
|
|
236
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
237
|
+
"template_parameters": {
|
|
238
|
+
"metadata": {
|
|
239
|
+
"xpia_attack_type": xpia_attack_type,
|
|
240
|
+
"action": action,
|
|
241
|
+
"document_type": document_type,
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
}
|
|
245
|
+
)
|
|
246
|
+
progress_bar.update(1)
|
|
247
|
+
progress_bar.close()
|
|
248
|
+
return JsonLineList(sim_results)
|
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
|
|
5
5
|
"""Tooling for model evaluation"""
|
|
6
6
|
|
|
7
|
-
from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager
|
|
7
|
+
from ._identity_manager import ManagedIdentityAPITokenManager, PlainTokenManager
|
|
8
8
|
from ._proxy_completion_model import ProxyChatCompletionsModel
|
|
9
9
|
from ._rai_client import RAIClient
|
|
10
10
|
from ._template_handler import CONTENT_HARM_TEMPLATES_COLLECTION_KEY, AdversarialTemplateHandler
|
|
11
11
|
from .models import LLMBase, OpenAIChatCompletionsModel
|
|
12
|
+
from ..._constants import TokenScope
|
|
12
13
|
|
|
13
14
|
__all__ = [
|
|
14
15
|
"ManagedIdentityAPITokenManager",
|