azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -6,79 +6,108 @@
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import logging
|
|
8
8
|
import random
|
|
9
|
-
from typing import Any, Callable, Dict, List, Optional
|
|
9
|
+
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
|
10
|
+
import uuid
|
|
11
|
+
import warnings
|
|
10
12
|
|
|
11
13
|
from tqdm import tqdm
|
|
12
14
|
|
|
15
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
16
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
17
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
13
18
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
19
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
15
20
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
16
|
-
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
21
|
+
from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
|
|
17
22
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
23
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
24
|
+
from azure.core.credentials import TokenCredential
|
|
18
25
|
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
19
|
-
from azure.identity import DefaultAzureCredential
|
|
20
26
|
|
|
21
27
|
from ._constants import SupportedLanguages
|
|
22
|
-
from ._conversation import
|
|
28
|
+
from ._conversation import (
|
|
29
|
+
CallbackConversationBot,
|
|
30
|
+
MultiModalConversationBot,
|
|
31
|
+
ConversationBot,
|
|
32
|
+
ConversationRole,
|
|
33
|
+
ConversationTurn,
|
|
34
|
+
)
|
|
23
35
|
from ._conversation._conversation import simulate_conversation
|
|
24
36
|
from ._model_tools import (
|
|
25
37
|
AdversarialTemplateHandler,
|
|
26
38
|
ManagedIdentityAPITokenManager,
|
|
27
39
|
ProxyChatCompletionsModel,
|
|
28
40
|
RAIClient,
|
|
29
|
-
TokenScope,
|
|
30
41
|
)
|
|
42
|
+
from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
|
|
31
43
|
from ._utils import JsonLineList
|
|
32
44
|
|
|
33
45
|
logger = logging.getLogger(__name__)
|
|
34
46
|
|
|
35
47
|
|
|
48
|
+
@experimental
|
|
36
49
|
class AdversarialSimulator:
|
|
37
50
|
"""
|
|
38
51
|
Initializes the adversarial simulator with a project scope.
|
|
39
52
|
|
|
40
|
-
:param azure_ai_project: The
|
|
41
|
-
name.
|
|
42
|
-
:type azure_ai_project:
|
|
53
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
54
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
55
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
43
56
|
:param credential: The credential for connecting to Azure AI project.
|
|
44
57
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
58
|
+
|
|
59
|
+
.. admonition:: Example:
|
|
60
|
+
|
|
61
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
62
|
+
:start-after: [START adversarial_scenario]
|
|
63
|
+
:end-before: [END adversarial_scenario]
|
|
64
|
+
:language: python
|
|
65
|
+
:dedent: 8
|
|
66
|
+
:caption: Run the AdversarialSimulator with an AdversarialConversation scenario to produce 2 results with
|
|
67
|
+
2 conversation turns each (4 messages per result).
|
|
45
68
|
"""
|
|
46
69
|
|
|
47
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential
|
|
70
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
48
71
|
"""Constructor."""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
warnings.warn(
|
|
73
|
+
"DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
|
|
74
|
+
+ " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
|
|
75
|
+
+ "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
|
|
76
|
+
+ " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
|
|
77
|
+
DeprecationWarning,
|
|
78
|
+
stacklevel=2,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if is_onedp_project(azure_ai_project):
|
|
82
|
+
self.azure_ai_project = azure_ai_project
|
|
83
|
+
self.credential = cast(TokenCredential, credential)
|
|
84
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
85
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
86
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
87
|
+
credential=self.credential,
|
|
58
88
|
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
89
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
90
|
+
else:
|
|
91
|
+
try:
|
|
92
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
93
|
+
except EvaluationException as e:
|
|
94
|
+
raise EvaluationException(
|
|
95
|
+
message=e.message,
|
|
96
|
+
internal_message=e.internal_message,
|
|
97
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
98
|
+
category=e.category,
|
|
99
|
+
blame=e.blame,
|
|
100
|
+
) from e
|
|
101
|
+
self.credential = cast(TokenCredential, credential)
|
|
102
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
103
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
104
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
105
|
+
credential=self.credential,
|
|
68
106
|
)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
elif "credential" in azure_ai_project:
|
|
72
|
-
credential = azure_ai_project["credential"]
|
|
73
|
-
self.azure_ai_project = azure_ai_project
|
|
74
|
-
self.token_manager = ManagedIdentityAPITokenManager(
|
|
75
|
-
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
76
|
-
logger=logging.getLogger("AdversarialSimulator"),
|
|
77
|
-
credential=credential,
|
|
78
|
-
)
|
|
79
|
-
self.rai_client = RAIClient(azure_ai_project=azure_ai_project, token_manager=self.token_manager)
|
|
107
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
108
|
+
|
|
80
109
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
81
|
-
azure_ai_project=azure_ai_project, rai_client=self.rai_client
|
|
110
|
+
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
82
111
|
)
|
|
83
112
|
|
|
84
113
|
def _ensure_service_dependencies(self):
|
|
@@ -92,7 +121,7 @@ class AdversarialSimulator:
|
|
|
92
121
|
blame=ErrorBlame.USER_ERROR,
|
|
93
122
|
)
|
|
94
123
|
|
|
95
|
-
#
|
|
124
|
+
# pylint: disable=too-many-locals
|
|
96
125
|
async def __call__(
|
|
97
126
|
self,
|
|
98
127
|
*,
|
|
@@ -106,10 +135,10 @@ class AdversarialSimulator:
|
|
|
106
135
|
api_call_retry_sleep_sec: int = 1,
|
|
107
136
|
api_call_delay_sec: int = 0,
|
|
108
137
|
concurrent_async_task: int = 3,
|
|
109
|
-
_jailbreak_type: Optional[str] = None,
|
|
110
138
|
language: SupportedLanguages = SupportedLanguages.English,
|
|
111
139
|
randomize_order: bool = True,
|
|
112
140
|
randomization_seed: Optional[int] = None,
|
|
141
|
+
**kwargs,
|
|
113
142
|
):
|
|
114
143
|
"""
|
|
115
144
|
Executes the adversarial simulation against a specified target function asynchronously.
|
|
@@ -159,28 +188,6 @@ class AdversarialSimulator:
|
|
|
159
188
|
|
|
160
189
|
The 'content' for 'assistant' role messages may includes the messages that your callback returned.
|
|
161
190
|
:rtype: List[Dict[str, Any]]
|
|
162
|
-
|
|
163
|
-
**Output format**
|
|
164
|
-
|
|
165
|
-
.. code-block:: python
|
|
166
|
-
|
|
167
|
-
return_value = [
|
|
168
|
-
{
|
|
169
|
-
'template_parameters': {},
|
|
170
|
-
'messages': [
|
|
171
|
-
{
|
|
172
|
-
'content': '<jailbreak prompt> <adversarial query>',
|
|
173
|
-
'role': 'user'
|
|
174
|
-
},
|
|
175
|
-
{
|
|
176
|
-
'content': "<response from endpoint>",
|
|
177
|
-
'role': 'assistant',
|
|
178
|
-
'context': None
|
|
179
|
-
}
|
|
180
|
-
],
|
|
181
|
-
'$schema': 'http://azureml/sdk-2-0/ChatConversation.json'
|
|
182
|
-
}
|
|
183
|
-
]
|
|
184
191
|
"""
|
|
185
192
|
|
|
186
193
|
# validate the inputs
|
|
@@ -202,6 +209,14 @@ class AdversarialSimulator:
|
|
|
202
209
|
)
|
|
203
210
|
self._ensure_service_dependencies()
|
|
204
211
|
templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
|
|
212
|
+
if len(templates) == 0:
|
|
213
|
+
raise EvaluationException(
|
|
214
|
+
message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
215
|
+
internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
216
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
217
|
+
)
|
|
218
|
+
simulation_id = str(uuid.uuid4())
|
|
219
|
+
logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
|
|
205
220
|
concurrent_async_task = min(concurrent_async_task, 1000)
|
|
206
221
|
semaphore = asyncio.Semaphore(concurrent_async_task)
|
|
207
222
|
sim_results = []
|
|
@@ -216,46 +231,85 @@ class AdversarialSimulator:
|
|
|
216
231
|
total_tasks,
|
|
217
232
|
)
|
|
218
233
|
total_tasks = min(total_tasks, max_simulation_results)
|
|
234
|
+
_jailbreak_type = kwargs.get("_jailbreak_type", None)
|
|
219
235
|
if _jailbreak_type:
|
|
220
|
-
|
|
236
|
+
if isinstance(self.rai_client, RAIClient):
|
|
237
|
+
jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
|
|
238
|
+
elif isinstance(self.rai_client, AIProjectClient):
|
|
239
|
+
jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
|
|
221
240
|
progress_bar = tqdm(
|
|
222
241
|
total=total_tasks,
|
|
223
242
|
desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
|
|
224
243
|
ncols=100,
|
|
225
244
|
unit="simulations",
|
|
226
245
|
)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
#
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
246
|
+
if randomize_order:
|
|
247
|
+
# The template parameter lists are persistent across sim runs within a session,
|
|
248
|
+
# So randomize a the selection instead of the parameter list directly,
|
|
249
|
+
# or a potentially large deep copy.
|
|
250
|
+
if randomization_seed is not None:
|
|
251
|
+
# Create a local random instance to avoid polluting global state
|
|
252
|
+
local_random = random.Random(randomization_seed)
|
|
253
|
+
local_random.shuffle(templates)
|
|
254
|
+
else:
|
|
255
|
+
random.shuffle(templates)
|
|
256
|
+
|
|
257
|
+
# Prepare task parameters based on scenario - but use a single append call for all scenarios
|
|
258
|
+
tasks = []
|
|
259
|
+
template_parameter_pairs = []
|
|
260
|
+
|
|
261
|
+
if scenario == AdversarialScenario.ADVERSARIAL_CONVERSATION:
|
|
262
|
+
# For ADVERSARIAL_CONVERSATION, flatten the parameters
|
|
263
|
+
for i, template in enumerate(templates):
|
|
264
|
+
if not template.template_parameters:
|
|
265
|
+
continue
|
|
266
|
+
for parameter in template.template_parameters:
|
|
267
|
+
template_parameter_pairs.append((template, parameter))
|
|
268
|
+
else:
|
|
269
|
+
# Use original logic for other scenarios - zip parameters
|
|
270
|
+
parameter_lists = [t.template_parameters for t in templates]
|
|
271
|
+
zipped_parameters = list(zip(*parameter_lists))
|
|
272
|
+
|
|
273
|
+
for param_group in zipped_parameters:
|
|
274
|
+
for template, parameter in zip(templates, param_group):
|
|
275
|
+
template_parameter_pairs.append((template, parameter))
|
|
276
|
+
|
|
277
|
+
# Limit to max_simulation_results if needed
|
|
278
|
+
if len(template_parameter_pairs) > max_simulation_results:
|
|
279
|
+
template_parameter_pairs = template_parameter_pairs[
|
|
280
|
+
:max_simulation_results
|
|
281
|
+
] # Create a seeded random instance for jailbreak selection if randomization_seed is provided
|
|
282
|
+
jailbreak_random = None
|
|
283
|
+
if _jailbreak_type == "upia" and randomization_seed is not None:
|
|
284
|
+
jailbreak_random = random.Random(randomization_seed)
|
|
285
|
+
|
|
286
|
+
# Single task append loop for all scenarios
|
|
287
|
+
for template, parameter in template_parameter_pairs:
|
|
288
|
+
if _jailbreak_type == "upia":
|
|
289
|
+
if jailbreak_random is not None:
|
|
290
|
+
selected_jailbreak = jailbreak_random.choice(jailbreak_dataset)
|
|
291
|
+
else:
|
|
292
|
+
selected_jailbreak = random.choice(jailbreak_dataset)
|
|
293
|
+
parameter = self._add_jailbreak_parameter(parameter, selected_jailbreak)
|
|
294
|
+
|
|
295
|
+
tasks.append(
|
|
296
|
+
asyncio.create_task(
|
|
297
|
+
self._simulate_async(
|
|
298
|
+
target=target,
|
|
299
|
+
template=template,
|
|
300
|
+
parameters=parameter,
|
|
301
|
+
max_conversation_turns=max_conversation_turns,
|
|
302
|
+
api_call_retry_limit=api_call_retry_limit,
|
|
303
|
+
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
304
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
305
|
+
language=language,
|
|
306
|
+
semaphore=semaphore,
|
|
307
|
+
scenario=scenario,
|
|
308
|
+
simulation_id=simulation_id,
|
|
253
309
|
)
|
|
254
310
|
)
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
if len(tasks) >= max_simulation_results:
|
|
258
|
-
break
|
|
311
|
+
)
|
|
312
|
+
|
|
259
313
|
for task in asyncio.as_completed(tasks):
|
|
260
314
|
sim_results.append(await task)
|
|
261
315
|
progress_bar.update(1)
|
|
@@ -263,16 +317,21 @@ class AdversarialSimulator:
|
|
|
263
317
|
|
|
264
318
|
return JsonLineList(sim_results)
|
|
265
319
|
|
|
266
|
-
def _to_chat_protocol(
|
|
320
|
+
def _to_chat_protocol(
|
|
321
|
+
self,
|
|
322
|
+
*,
|
|
323
|
+
conversation_history: List[ConversationTurn],
|
|
324
|
+
template_parameters: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
|
|
325
|
+
):
|
|
267
326
|
if template_parameters is None:
|
|
268
327
|
template_parameters = {}
|
|
269
328
|
messages = []
|
|
270
329
|
for _, m in enumerate(conversation_history):
|
|
271
330
|
message = {"content": m.message, "role": m.role.value}
|
|
272
|
-
if "context" in m.full_response:
|
|
331
|
+
if m.full_response is not None and "context" in m.full_response:
|
|
273
332
|
message["context"] = m.full_response["context"]
|
|
274
333
|
messages.append(message)
|
|
275
|
-
conversation_category = template_parameters.pop("metadata", {}).get("Category")
|
|
334
|
+
conversation_category = cast(Dict[str, str], template_parameters.pop("metadata", {})).get("Category")
|
|
276
335
|
template_parameters["metadata"] = {}
|
|
277
336
|
for key in (
|
|
278
337
|
"conversation_starter",
|
|
@@ -280,6 +339,9 @@ class AdversarialSimulator:
|
|
|
280
339
|
"target_population",
|
|
281
340
|
"topic",
|
|
282
341
|
"ch_template_placeholder",
|
|
342
|
+
"chatbot_name",
|
|
343
|
+
"name",
|
|
344
|
+
"group",
|
|
283
345
|
):
|
|
284
346
|
template_parameters.pop(key, None)
|
|
285
347
|
if conversation_category:
|
|
@@ -294,54 +356,92 @@ class AdversarialSimulator:
|
|
|
294
356
|
self,
|
|
295
357
|
*,
|
|
296
358
|
target: Callable,
|
|
297
|
-
template,
|
|
298
|
-
parameters,
|
|
299
|
-
max_conversation_turns,
|
|
300
|
-
api_call_retry_limit,
|
|
301
|
-
api_call_retry_sleep_sec,
|
|
302
|
-
api_call_delay_sec,
|
|
303
|
-
language,
|
|
304
|
-
semaphore,
|
|
359
|
+
template: AdversarialTemplate,
|
|
360
|
+
parameters: TemplateParameters,
|
|
361
|
+
max_conversation_turns: int,
|
|
362
|
+
api_call_retry_limit: int,
|
|
363
|
+
api_call_retry_sleep_sec: int,
|
|
364
|
+
api_call_delay_sec: int,
|
|
365
|
+
language: SupportedLanguages,
|
|
366
|
+
semaphore: asyncio.Semaphore,
|
|
367
|
+
scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
|
|
368
|
+
simulation_id: str = "",
|
|
305
369
|
) -> List[Dict]:
|
|
306
|
-
user_bot = self._setup_bot(
|
|
370
|
+
user_bot = self._setup_bot(
|
|
371
|
+
role=ConversationRole.USER,
|
|
372
|
+
template=template,
|
|
373
|
+
parameters=parameters,
|
|
374
|
+
scenario=scenario,
|
|
375
|
+
simulation_id=simulation_id,
|
|
376
|
+
)
|
|
307
377
|
system_bot = self._setup_bot(
|
|
308
|
-
target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
|
|
378
|
+
target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
|
|
309
379
|
)
|
|
310
380
|
bots = [user_bot, system_bot]
|
|
311
|
-
session = get_async_http_client().with_policies(
|
|
312
|
-
retry_policy=AsyncRetryPolicy(
|
|
313
|
-
retry_total=api_call_retry_limit,
|
|
314
|
-
retry_backoff_factor=api_call_retry_sleep_sec,
|
|
315
|
-
retry_mode=RetryMode.Fixed,
|
|
316
|
-
)
|
|
317
|
-
)
|
|
318
381
|
|
|
319
|
-
async
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
382
|
+
async def run_simulation(session_obj):
|
|
383
|
+
async with semaphore:
|
|
384
|
+
_, conversation_history = await simulate_conversation(
|
|
385
|
+
bots=bots,
|
|
386
|
+
session=session_obj,
|
|
387
|
+
turn_limit=max_conversation_turns,
|
|
388
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
389
|
+
language=language,
|
|
390
|
+
)
|
|
391
|
+
return conversation_history
|
|
392
|
+
|
|
393
|
+
if isinstance(self.rai_client, AIProjectClient):
|
|
394
|
+
session = self.rai_client
|
|
395
|
+
else:
|
|
396
|
+
session = get_async_http_client().with_policies(
|
|
397
|
+
retry_policy=AsyncRetryPolicy(
|
|
398
|
+
retry_total=api_call_retry_limit,
|
|
399
|
+
retry_backoff_factor=api_call_retry_sleep_sec,
|
|
400
|
+
retry_mode=RetryMode.Fixed,
|
|
401
|
+
)
|
|
326
402
|
)
|
|
327
|
-
|
|
403
|
+
conversation_history = await run_simulation(session)
|
|
404
|
+
|
|
405
|
+
return self._to_chat_protocol(
|
|
406
|
+
conversation_history=conversation_history,
|
|
407
|
+
template_parameters=cast(Dict[str, Union[str, Dict[str, str]]], parameters),
|
|
408
|
+
)
|
|
328
409
|
|
|
329
|
-
def _get_user_proxy_completion_model(
|
|
410
|
+
def _get_user_proxy_completion_model(
|
|
411
|
+
self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
|
|
412
|
+
) -> ProxyChatCompletionsModel:
|
|
413
|
+
endpoint_url = (
|
|
414
|
+
self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit"
|
|
415
|
+
if isinstance(self.rai_client, AIProjectClient)
|
|
416
|
+
else self.rai_client.simulation_submit_endpoint
|
|
417
|
+
)
|
|
330
418
|
return ProxyChatCompletionsModel(
|
|
331
419
|
name="raisvc_proxy_model",
|
|
332
420
|
template_key=template_key,
|
|
333
421
|
template_parameters=template_parameters,
|
|
334
|
-
endpoint_url=
|
|
422
|
+
endpoint_url=endpoint_url,
|
|
335
423
|
token_manager=self.token_manager,
|
|
336
424
|
api_version="2023-07-01-preview",
|
|
337
425
|
max_tokens=1200,
|
|
338
426
|
temperature=0.0,
|
|
427
|
+
simulation_id=simulation_id,
|
|
339
428
|
)
|
|
340
429
|
|
|
341
|
-
def _setup_bot(
|
|
342
|
-
|
|
430
|
+
def _setup_bot(
|
|
431
|
+
self,
|
|
432
|
+
*,
|
|
433
|
+
role: ConversationRole,
|
|
434
|
+
template: AdversarialTemplate,
|
|
435
|
+
parameters: TemplateParameters,
|
|
436
|
+
target: Optional[Callable] = None,
|
|
437
|
+
scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
|
|
438
|
+
simulation_id: str = "",
|
|
439
|
+
) -> ConversationBot:
|
|
440
|
+
if role is ConversationRole.USER:
|
|
343
441
|
model = self._get_user_proxy_completion_model(
|
|
344
|
-
template_key=template.template_name,
|
|
442
|
+
template_key=template.template_name,
|
|
443
|
+
template_parameters=parameters,
|
|
444
|
+
simulation_id=simulation_id,
|
|
345
445
|
)
|
|
346
446
|
return ConversationBot(
|
|
347
447
|
role=role,
|
|
@@ -350,35 +450,61 @@ class AdversarialSimulator:
|
|
|
350
450
|
instantiation_parameters=parameters,
|
|
351
451
|
)
|
|
352
452
|
|
|
353
|
-
if role
|
|
453
|
+
if role is ConversationRole.ASSISTANT:
|
|
454
|
+
if target is None:
|
|
455
|
+
msg = "Cannot setup system bot. Target is None"
|
|
354
456
|
|
|
355
|
-
|
|
356
|
-
|
|
457
|
+
raise EvaluationException(
|
|
458
|
+
message=msg,
|
|
459
|
+
internal_message=msg,
|
|
460
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
461
|
+
error_category=ErrorCategory.INVALID_VALUE,
|
|
462
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
class DummyModel:
|
|
466
|
+
def __init__(self):
|
|
467
|
+
self.name = "dummy_model"
|
|
468
|
+
|
|
469
|
+
def __call__(self) -> None:
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
if scenario in [
|
|
473
|
+
_UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
|
|
474
|
+
_UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
|
|
475
|
+
]:
|
|
476
|
+
return MultiModalConversationBot(
|
|
477
|
+
callback=target,
|
|
478
|
+
role=role,
|
|
479
|
+
model=DummyModel(),
|
|
480
|
+
user_template=str(template),
|
|
481
|
+
user_template_parameters=parameters,
|
|
482
|
+
rai_client=self.rai_client,
|
|
483
|
+
conversation_template="",
|
|
484
|
+
instantiation_parameters={},
|
|
485
|
+
)
|
|
357
486
|
|
|
358
|
-
dummy_model.name = "dummy_model"
|
|
359
487
|
return CallbackConversationBot(
|
|
360
488
|
callback=target,
|
|
361
489
|
role=role,
|
|
362
|
-
model=
|
|
490
|
+
model=DummyModel(),
|
|
363
491
|
user_template=str(template),
|
|
364
492
|
user_template_parameters=parameters,
|
|
365
493
|
conversation_template="",
|
|
366
494
|
instantiation_parameters={},
|
|
367
495
|
)
|
|
368
|
-
return ConversationBot(
|
|
369
|
-
role=role,
|
|
370
|
-
model=model,
|
|
371
|
-
conversation_template=template,
|
|
372
|
-
instantiation_parameters=parameters,
|
|
373
|
-
)
|
|
374
496
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
497
|
+
msg = "Invalid value for enum ConversationRole. This should never happen."
|
|
498
|
+
raise EvaluationException(
|
|
499
|
+
message=msg,
|
|
500
|
+
internal_message=msg,
|
|
501
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
502
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
503
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
504
|
+
)
|
|
381
505
|
|
|
506
|
+
def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
|
|
507
|
+
parameters["jailbreak_string"] = to_join
|
|
382
508
|
return parameters
|
|
383
509
|
|
|
384
510
|
def call_sync(
|
|
@@ -5,7 +5,17 @@ from enum import Enum
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class SupportedLanguages(Enum):
|
|
8
|
-
"""Supported languages for evaluation, using ISO standard language codes.
|
|
8
|
+
"""Supported languages for evaluation, using ISO standard language codes.
|
|
9
|
+
|
|
10
|
+
.. admonition:: Example:
|
|
11
|
+
|
|
12
|
+
.. literalinclude:: ../samples/evaluation_samples_simulate.py
|
|
13
|
+
:start-after: [START supported_languages]
|
|
14
|
+
:end-before: [END supported_languages]
|
|
15
|
+
:language: python
|
|
16
|
+
:dedent: 8
|
|
17
|
+
:caption: Run the AdversarialSimulator with Simplified Chinese language support for evaluation.
|
|
18
|
+
"""
|
|
9
19
|
|
|
10
20
|
Spanish = "es"
|
|
11
21
|
Italian = "it"
|
|
@@ -15,3 +25,4 @@ class SupportedLanguages(Enum):
|
|
|
15
25
|
Portuguese = "pt"
|
|
16
26
|
Japanese = "ja"
|
|
17
27
|
English = "en"
|
|
28
|
+
Korean = "ko"
|