azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +83 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +148 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -6,30 +6,38 @@
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import logging
|
|
8
8
|
import random
|
|
9
|
-
from typing import Any, Callable, Dict, List,
|
|
10
|
-
|
|
9
|
+
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
|
10
|
+
import uuid
|
|
11
|
+
import warnings
|
|
11
12
|
|
|
12
13
|
from tqdm import tqdm
|
|
13
14
|
|
|
14
15
|
from azure.ai.evaluation._common._experimental import experimental
|
|
15
|
-
from azure.ai.evaluation._common.utils import validate_azure_ai_project
|
|
16
|
+
from azure.ai.evaluation._common.utils import validate_azure_ai_project, is_onedp_project
|
|
17
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
16
18
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
17
19
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
18
20
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
19
|
-
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
21
|
+
from azure.ai.evaluation.simulator import AdversarialScenario, AdversarialScenarioJailbreak
|
|
20
22
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
23
|
+
from azure.ai.evaluation._constants import TokenScope
|
|
21
24
|
from azure.core.credentials import TokenCredential
|
|
22
25
|
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
23
26
|
|
|
24
27
|
from ._constants import SupportedLanguages
|
|
25
|
-
from ._conversation import
|
|
28
|
+
from ._conversation import (
|
|
29
|
+
CallbackConversationBot,
|
|
30
|
+
MultiModalConversationBot,
|
|
31
|
+
ConversationBot,
|
|
32
|
+
ConversationRole,
|
|
33
|
+
ConversationTurn,
|
|
34
|
+
)
|
|
26
35
|
from ._conversation._conversation import simulate_conversation
|
|
27
36
|
from ._model_tools import (
|
|
28
37
|
AdversarialTemplateHandler,
|
|
29
38
|
ManagedIdentityAPITokenManager,
|
|
30
39
|
ProxyChatCompletionsModel,
|
|
31
40
|
RAIClient,
|
|
32
|
-
TokenScope,
|
|
33
41
|
)
|
|
34
42
|
from ._model_tools._template_handler import AdversarialTemplate, TemplateParameters
|
|
35
43
|
from ._utils import JsonLineList
|
|
@@ -42,9 +50,9 @@ class AdversarialSimulator:
|
|
|
42
50
|
"""
|
|
43
51
|
Initializes the adversarial simulator with a project scope.
|
|
44
52
|
|
|
45
|
-
:param azure_ai_project: The
|
|
46
|
-
name.
|
|
47
|
-
:type azure_ai_project:
|
|
53
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
54
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
55
|
+
:type azure_ai_project: Union[str, AzureAIProject]
|
|
48
56
|
:param credential: The credential for connecting to Azure AI project.
|
|
49
57
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
50
58
|
|
|
@@ -59,26 +67,45 @@ class AdversarialSimulator:
|
|
|
59
67
|
2 conversation turns each (4 messages per result).
|
|
60
68
|
"""
|
|
61
69
|
|
|
62
|
-
def __init__(self, *, azure_ai_project: AzureAIProject, credential: TokenCredential):
|
|
70
|
+
def __init__(self, *, azure_ai_project: Union[str, AzureAIProject], credential: TokenCredential):
|
|
63
71
|
"""Constructor."""
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
72
|
-
category=e.category,
|
|
73
|
-
blame=e.blame,
|
|
74
|
-
) from e
|
|
75
|
-
|
|
76
|
-
self.token_manager = ManagedIdentityAPITokenManager(
|
|
77
|
-
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
78
|
-
logger=logging.getLogger("AdversarialSimulator"),
|
|
79
|
-
credential=cast(TokenCredential, credential),
|
|
72
|
+
warnings.warn(
|
|
73
|
+
"DEPRECATION NOTE: Azure AI Evaluation SDK has discontinued active development on the AdversarialSimulator class."
|
|
74
|
+
+ " While existing functionality remains available in preview, it is no longer recommended for production workloads or future integration. "
|
|
75
|
+
+ "We recommend users migrate to the AI Red Teaming Agent for future use as it supports full parity of functionality."
|
|
76
|
+
+ " See https://aka.ms/airedteamingagent-sample for details on AI Red Teaming Agent.",
|
|
77
|
+
DeprecationWarning,
|
|
78
|
+
stacklevel=2,
|
|
80
79
|
)
|
|
81
|
-
|
|
80
|
+
|
|
81
|
+
if is_onedp_project(azure_ai_project):
|
|
82
|
+
self.azure_ai_project = azure_ai_project
|
|
83
|
+
self.credential = cast(TokenCredential, credential)
|
|
84
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
85
|
+
token_scope=TokenScope.COGNITIVE_SERVICES_MANAGEMENT,
|
|
86
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
87
|
+
credential=self.credential,
|
|
88
|
+
)
|
|
89
|
+
self.rai_client = AIProjectClient(endpoint=azure_ai_project, credential=credential)
|
|
90
|
+
else:
|
|
91
|
+
try:
|
|
92
|
+
self.azure_ai_project = validate_azure_ai_project(azure_ai_project)
|
|
93
|
+
except EvaluationException as e:
|
|
94
|
+
raise EvaluationException(
|
|
95
|
+
message=e.message,
|
|
96
|
+
internal_message=e.internal_message,
|
|
97
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
98
|
+
category=e.category,
|
|
99
|
+
blame=e.blame,
|
|
100
|
+
) from e
|
|
101
|
+
self.credential = cast(TokenCredential, credential)
|
|
102
|
+
self.token_manager = ManagedIdentityAPITokenManager(
|
|
103
|
+
token_scope=TokenScope.DEFAULT_AZURE_MANAGEMENT,
|
|
104
|
+
logger=logging.getLogger("AdversarialSimulator"),
|
|
105
|
+
credential=self.credential,
|
|
106
|
+
)
|
|
107
|
+
self.rai_client = RAIClient(azure_ai_project=self.azure_ai_project, token_manager=self.token_manager)
|
|
108
|
+
|
|
82
109
|
self.adversarial_template_handler = AdversarialTemplateHandler(
|
|
83
110
|
azure_ai_project=self.azure_ai_project, rai_client=self.rai_client
|
|
84
111
|
)
|
|
@@ -182,6 +209,14 @@ class AdversarialSimulator:
|
|
|
182
209
|
)
|
|
183
210
|
self._ensure_service_dependencies()
|
|
184
211
|
templates = await self.adversarial_template_handler._get_content_harm_template_collections(scenario.value)
|
|
212
|
+
if len(templates) == 0:
|
|
213
|
+
raise EvaluationException(
|
|
214
|
+
message="Templates not found. Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
215
|
+
internal_message="Please check https://aka.ms/azureaiadvsimulator-regionsupport for region support.",
|
|
216
|
+
target=ErrorTarget.ADVERSARIAL_SIMULATOR,
|
|
217
|
+
)
|
|
218
|
+
simulation_id = str(uuid.uuid4())
|
|
219
|
+
logger.warning("Use simulation_id to help debug the issue: %s", str(simulation_id))
|
|
185
220
|
concurrent_async_task = min(concurrent_async_task, 1000)
|
|
186
221
|
semaphore = asyncio.Semaphore(concurrent_async_task)
|
|
187
222
|
sim_results = []
|
|
@@ -198,46 +233,83 @@ class AdversarialSimulator:
|
|
|
198
233
|
total_tasks = min(total_tasks, max_simulation_results)
|
|
199
234
|
_jailbreak_type = kwargs.get("_jailbreak_type", None)
|
|
200
235
|
if _jailbreak_type:
|
|
201
|
-
|
|
236
|
+
if isinstance(self.rai_client, RAIClient):
|
|
237
|
+
jailbreak_dataset = await self.rai_client.get_jailbreaks_dataset(type=_jailbreak_type)
|
|
238
|
+
elif isinstance(self.rai_client, AIProjectClient):
|
|
239
|
+
jailbreak_dataset = self.rai_client.red_teams.get_jail_break_dataset_with_type(type=_jailbreak_type)
|
|
202
240
|
progress_bar = tqdm(
|
|
203
241
|
total=total_tasks,
|
|
204
242
|
desc="generating jailbreak simulations" if _jailbreak_type else "generating simulations",
|
|
205
243
|
ncols=100,
|
|
206
244
|
unit="simulations",
|
|
207
245
|
)
|
|
208
|
-
|
|
209
246
|
if randomize_order:
|
|
210
247
|
# The template parameter lists are persistent across sim runs within a session,
|
|
211
248
|
# So randomize a the selection instead of the parameter list directly,
|
|
212
249
|
# or a potentially large deep copy.
|
|
213
250
|
if randomization_seed is not None:
|
|
214
|
-
random
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
251
|
+
# Create a local random instance to avoid polluting global state
|
|
252
|
+
local_random = random.Random(randomization_seed)
|
|
253
|
+
local_random.shuffle(templates)
|
|
254
|
+
else:
|
|
255
|
+
random.shuffle(templates)
|
|
256
|
+
|
|
257
|
+
# Prepare task parameters based on scenario - but use a single append call for all scenarios
|
|
258
|
+
tasks = []
|
|
259
|
+
template_parameter_pairs = []
|
|
260
|
+
|
|
261
|
+
if scenario == AdversarialScenario.ADVERSARIAL_CONVERSATION:
|
|
262
|
+
# For ADVERSARIAL_CONVERSATION, flatten the parameters
|
|
263
|
+
for i, template in enumerate(templates):
|
|
264
|
+
if not template.template_parameters:
|
|
265
|
+
continue
|
|
266
|
+
for parameter in template.template_parameters:
|
|
267
|
+
template_parameter_pairs.append((template, parameter))
|
|
268
|
+
else:
|
|
269
|
+
# Use original logic for other scenarios - zip parameters
|
|
270
|
+
parameter_lists = [t.template_parameters for t in templates]
|
|
271
|
+
zipped_parameters = list(zip(*parameter_lists))
|
|
272
|
+
|
|
273
|
+
for param_group in zipped_parameters:
|
|
274
|
+
for template, parameter in zip(templates, param_group):
|
|
275
|
+
template_parameter_pairs.append((template, parameter))
|
|
276
|
+
|
|
277
|
+
# Limit to max_simulation_results if needed
|
|
278
|
+
if len(template_parameter_pairs) > max_simulation_results:
|
|
279
|
+
template_parameter_pairs = template_parameter_pairs[
|
|
280
|
+
:max_simulation_results
|
|
281
|
+
] # Create a seeded random instance for jailbreak selection if randomization_seed is provided
|
|
282
|
+
jailbreak_random = None
|
|
283
|
+
if _jailbreak_type == "upia" and randomization_seed is not None:
|
|
284
|
+
jailbreak_random = random.Random(randomization_seed)
|
|
285
|
+
|
|
286
|
+
# Single task append loop for all scenarios
|
|
287
|
+
for template, parameter in template_parameter_pairs:
|
|
288
|
+
if _jailbreak_type == "upia":
|
|
289
|
+
if jailbreak_random is not None:
|
|
290
|
+
selected_jailbreak = jailbreak_random.choice(jailbreak_dataset)
|
|
291
|
+
else:
|
|
292
|
+
selected_jailbreak = random.choice(jailbreak_dataset)
|
|
293
|
+
parameter = self._add_jailbreak_parameter(parameter, selected_jailbreak)
|
|
294
|
+
|
|
295
|
+
tasks.append(
|
|
296
|
+
asyncio.create_task(
|
|
297
|
+
self._simulate_async(
|
|
298
|
+
target=target,
|
|
299
|
+
template=template,
|
|
300
|
+
parameters=parameter,
|
|
301
|
+
max_conversation_turns=max_conversation_turns,
|
|
302
|
+
api_call_retry_limit=api_call_retry_limit,
|
|
303
|
+
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
|
|
304
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
305
|
+
language=language,
|
|
306
|
+
semaphore=semaphore,
|
|
307
|
+
scenario=scenario,
|
|
308
|
+
simulation_id=simulation_id,
|
|
235
309
|
)
|
|
236
310
|
)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
if len(tasks) >= max_simulation_results:
|
|
240
|
-
break
|
|
311
|
+
)
|
|
312
|
+
|
|
241
313
|
for task in asyncio.as_completed(tasks):
|
|
242
314
|
sim_results.append(await task)
|
|
243
315
|
progress_bar.update(1)
|
|
@@ -292,28 +364,43 @@ class AdversarialSimulator:
|
|
|
292
364
|
api_call_delay_sec: int,
|
|
293
365
|
language: SupportedLanguages,
|
|
294
366
|
semaphore: asyncio.Semaphore,
|
|
367
|
+
scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
|
|
368
|
+
simulation_id: str = "",
|
|
295
369
|
) -> List[Dict]:
|
|
296
|
-
user_bot = self._setup_bot(
|
|
370
|
+
user_bot = self._setup_bot(
|
|
371
|
+
role=ConversationRole.USER,
|
|
372
|
+
template=template,
|
|
373
|
+
parameters=parameters,
|
|
374
|
+
scenario=scenario,
|
|
375
|
+
simulation_id=simulation_id,
|
|
376
|
+
)
|
|
297
377
|
system_bot = self._setup_bot(
|
|
298
|
-
target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters
|
|
378
|
+
target=target, role=ConversationRole.ASSISTANT, template=template, parameters=parameters, scenario=scenario
|
|
299
379
|
)
|
|
300
380
|
bots = [user_bot, system_bot]
|
|
301
|
-
session = get_async_http_client().with_policies(
|
|
302
|
-
retry_policy=AsyncRetryPolicy(
|
|
303
|
-
retry_total=api_call_retry_limit,
|
|
304
|
-
retry_backoff_factor=api_call_retry_sleep_sec,
|
|
305
|
-
retry_mode=RetryMode.Fixed,
|
|
306
|
-
)
|
|
307
|
-
)
|
|
308
381
|
|
|
309
|
-
async
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
382
|
+
async def run_simulation(session_obj):
|
|
383
|
+
async with semaphore:
|
|
384
|
+
_, conversation_history = await simulate_conversation(
|
|
385
|
+
bots=bots,
|
|
386
|
+
session=session_obj,
|
|
387
|
+
turn_limit=max_conversation_turns,
|
|
388
|
+
api_call_delay_sec=api_call_delay_sec,
|
|
389
|
+
language=language,
|
|
390
|
+
)
|
|
391
|
+
return conversation_history
|
|
392
|
+
|
|
393
|
+
if isinstance(self.rai_client, AIProjectClient):
|
|
394
|
+
session = self.rai_client
|
|
395
|
+
else:
|
|
396
|
+
session = get_async_http_client().with_policies(
|
|
397
|
+
retry_policy=AsyncRetryPolicy(
|
|
398
|
+
retry_total=api_call_retry_limit,
|
|
399
|
+
retry_backoff_factor=api_call_retry_sleep_sec,
|
|
400
|
+
retry_mode=RetryMode.Fixed,
|
|
401
|
+
)
|
|
316
402
|
)
|
|
403
|
+
conversation_history = await run_simulation(session)
|
|
317
404
|
|
|
318
405
|
return self._to_chat_protocol(
|
|
319
406
|
conversation_history=conversation_history,
|
|
@@ -321,17 +408,23 @@ class AdversarialSimulator:
|
|
|
321
408
|
)
|
|
322
409
|
|
|
323
410
|
def _get_user_proxy_completion_model(
|
|
324
|
-
self, template_key: str, template_parameters: TemplateParameters
|
|
411
|
+
self, template_key: str, template_parameters: TemplateParameters, simulation_id: str = ""
|
|
325
412
|
) -> ProxyChatCompletionsModel:
|
|
413
|
+
endpoint_url = (
|
|
414
|
+
self.rai_client._config.endpoint + "/redTeams/simulation/chat/completions/submit"
|
|
415
|
+
if isinstance(self.rai_client, AIProjectClient)
|
|
416
|
+
else self.rai_client.simulation_submit_endpoint
|
|
417
|
+
)
|
|
326
418
|
return ProxyChatCompletionsModel(
|
|
327
419
|
name="raisvc_proxy_model",
|
|
328
420
|
template_key=template_key,
|
|
329
421
|
template_parameters=template_parameters,
|
|
330
|
-
endpoint_url=
|
|
422
|
+
endpoint_url=endpoint_url,
|
|
331
423
|
token_manager=self.token_manager,
|
|
332
424
|
api_version="2023-07-01-preview",
|
|
333
425
|
max_tokens=1200,
|
|
334
426
|
temperature=0.0,
|
|
427
|
+
simulation_id=simulation_id,
|
|
335
428
|
)
|
|
336
429
|
|
|
337
430
|
def _setup_bot(
|
|
@@ -341,10 +434,14 @@ class AdversarialSimulator:
|
|
|
341
434
|
template: AdversarialTemplate,
|
|
342
435
|
parameters: TemplateParameters,
|
|
343
436
|
target: Optional[Callable] = None,
|
|
437
|
+
scenario: Union[AdversarialScenario, AdversarialScenarioJailbreak],
|
|
438
|
+
simulation_id: str = "",
|
|
344
439
|
) -> ConversationBot:
|
|
345
440
|
if role is ConversationRole.USER:
|
|
346
441
|
model = self._get_user_proxy_completion_model(
|
|
347
|
-
template_key=template.template_name,
|
|
442
|
+
template_key=template.template_name,
|
|
443
|
+
template_parameters=parameters,
|
|
444
|
+
simulation_id=simulation_id,
|
|
348
445
|
)
|
|
349
446
|
return ConversationBot(
|
|
350
447
|
role=role,
|
|
@@ -372,6 +469,21 @@ class AdversarialSimulator:
|
|
|
372
469
|
def __call__(self) -> None:
|
|
373
470
|
pass
|
|
374
471
|
|
|
472
|
+
if scenario in [
|
|
473
|
+
_UnstableAdversarialScenario.ADVERSARIAL_IMAGE_GEN,
|
|
474
|
+
_UnstableAdversarialScenario.ADVERSARIAL_IMAGE_MULTIMODAL,
|
|
475
|
+
]:
|
|
476
|
+
return MultiModalConversationBot(
|
|
477
|
+
callback=target,
|
|
478
|
+
role=role,
|
|
479
|
+
model=DummyModel(),
|
|
480
|
+
user_template=str(template),
|
|
481
|
+
user_template_parameters=parameters,
|
|
482
|
+
rai_client=self.rai_client,
|
|
483
|
+
conversation_template="",
|
|
484
|
+
instantiation_parameters={},
|
|
485
|
+
)
|
|
486
|
+
|
|
375
487
|
return CallbackConversationBot(
|
|
376
488
|
callback=target,
|
|
377
489
|
role=role,
|
|
@@ -391,13 +503,8 @@ class AdversarialSimulator:
|
|
|
391
503
|
blame=ErrorBlame.SYSTEM_ERROR,
|
|
392
504
|
)
|
|
393
505
|
|
|
394
|
-
def
|
|
395
|
-
|
|
396
|
-
if key in parameters.keys():
|
|
397
|
-
parameters[key] = f"{to_join} {parameters[key]}"
|
|
398
|
-
else:
|
|
399
|
-
parameters[key] = to_join
|
|
400
|
-
|
|
506
|
+
def _add_jailbreak_parameter(self, parameters: TemplateParameters, to_join: str) -> TemplateParameters:
|
|
507
|
+
parameters["jailbreak_string"] = to_join
|
|
401
508
|
return parameters
|
|
402
509
|
|
|
403
510
|
def call_sync(
|
|
@@ -8,13 +8,14 @@ import logging
|
|
|
8
8
|
import time
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
|
11
|
-
|
|
11
|
+
import base64
|
|
12
|
+
import re
|
|
12
13
|
import jinja2
|
|
13
14
|
|
|
14
15
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
16
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
16
|
-
|
|
17
|
-
from
|
|
17
|
+
from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
|
|
18
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
18
19
|
from .._model_tools._template_handler import TemplateParameters
|
|
19
20
|
from .constants import ConversationRole
|
|
20
21
|
|
|
@@ -136,7 +137,7 @@ class ConversationBot:
|
|
|
136
137
|
self.conversation_starter = jinja2.Template(
|
|
137
138
|
conversation_starter_content, undefined=jinja2.StrictUndefined
|
|
138
139
|
)
|
|
139
|
-
except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
|
|
140
|
+
except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
|
|
140
141
|
self.conversation_starter = conversation_starter_content
|
|
141
142
|
else:
|
|
142
143
|
self.logger.info(
|
|
@@ -145,10 +146,11 @@ class ConversationBot:
|
|
|
145
146
|
|
|
146
147
|
async def generate_response(
|
|
147
148
|
self,
|
|
148
|
-
session: AsyncHttpPipeline,
|
|
149
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
149
150
|
conversation_history: List[ConversationTurn],
|
|
150
151
|
max_history: int,
|
|
151
152
|
turn_number: int = 0,
|
|
153
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
152
154
|
) -> Tuple[dict, dict, float, dict]:
|
|
153
155
|
"""
|
|
154
156
|
Prompt the ConversationBot for a response.
|
|
@@ -175,6 +177,9 @@ class ConversationBot:
|
|
|
175
177
|
samples = [self.conversation_starter.render(**self.persona_template_args)]
|
|
176
178
|
else:
|
|
177
179
|
samples = [self.conversation_starter]
|
|
180
|
+
jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
|
|
181
|
+
if jailbreak_string:
|
|
182
|
+
samples = [f"{jailbreak_string} {samples[0]}"]
|
|
178
183
|
time_taken = 0
|
|
179
184
|
|
|
180
185
|
finish_reason = ["stop"]
|
|
@@ -251,10 +256,11 @@ class CallbackConversationBot(ConversationBot):
|
|
|
251
256
|
|
|
252
257
|
async def generate_response(
|
|
253
258
|
self,
|
|
254
|
-
session: AsyncHttpPipeline,
|
|
259
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
255
260
|
conversation_history: List[Any],
|
|
256
261
|
max_history: int,
|
|
257
262
|
turn_number: int = 0,
|
|
263
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
258
264
|
) -> Tuple[dict, dict, float, dict]:
|
|
259
265
|
chat_protocol_message = self._to_chat_protocol(
|
|
260
266
|
self.user_template, conversation_history, self.user_template_parameters
|
|
@@ -262,7 +268,7 @@ class CallbackConversationBot(ConversationBot):
|
|
|
262
268
|
msg_copy = copy.deepcopy(chat_protocol_message)
|
|
263
269
|
result = {}
|
|
264
270
|
start_time = time.time()
|
|
265
|
-
result = await self.callback(msg_copy)
|
|
271
|
+
result = await self.callback(msg_copy, session_state=session_state)
|
|
266
272
|
end_time = time.time()
|
|
267
273
|
if not result:
|
|
268
274
|
result = {
|
|
@@ -271,8 +277,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
271
277
|
"id": None,
|
|
272
278
|
"template_parameters": {},
|
|
273
279
|
}
|
|
274
|
-
self.logger.info("Using user provided callback returning response.")
|
|
275
|
-
|
|
276
280
|
time_taken = end_time - start_time
|
|
277
281
|
try:
|
|
278
282
|
response = {
|
|
@@ -290,8 +294,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
290
294
|
blame=ErrorBlame.USER_ERROR,
|
|
291
295
|
) from exc
|
|
292
296
|
|
|
293
|
-
self.logger.info("Parsed callback response")
|
|
294
|
-
|
|
295
297
|
return response, {}, time_taken, result
|
|
296
298
|
|
|
297
299
|
# Bug 3354264: template is unused in the method - is this intentional?
|
|
@@ -308,9 +310,134 @@ class CallbackConversationBot(ConversationBot):
|
|
|
308
310
|
}
|
|
309
311
|
|
|
310
312
|
|
|
313
|
+
class MultiModalConversationBot(ConversationBot):
|
|
314
|
+
"""MultiModal Conversation bot that uses a user provided callback to generate responses.
|
|
315
|
+
|
|
316
|
+
:param callback: The callback function to use to generate responses.
|
|
317
|
+
:type callback: Callable
|
|
318
|
+
:param user_template: The template to use for the request.
|
|
319
|
+
:type user_template: str
|
|
320
|
+
:param user_template_parameters: The template parameters to use for the request.
|
|
321
|
+
:type user_template_parameters: Dict
|
|
322
|
+
:param args: Optional arguments to pass to the parent class.
|
|
323
|
+
:type args: Any
|
|
324
|
+
:param kwargs: Optional keyword arguments to pass to the parent class.
|
|
325
|
+
:type kwargs: Any
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
def __init__(
|
|
329
|
+
self,
|
|
330
|
+
callback: Callable,
|
|
331
|
+
user_template: str,
|
|
332
|
+
user_template_parameters: TemplateParameters,
|
|
333
|
+
rai_client: Union[RAIClient, AIProjectClient],
|
|
334
|
+
*args,
|
|
335
|
+
**kwargs,
|
|
336
|
+
) -> None:
|
|
337
|
+
self.callback = callback
|
|
338
|
+
self.user_template = user_template
|
|
339
|
+
self.user_template_parameters = user_template_parameters
|
|
340
|
+
self.rai_client = rai_client
|
|
341
|
+
|
|
342
|
+
super().__init__(*args, **kwargs)
|
|
343
|
+
|
|
344
|
+
async def generate_response(
|
|
345
|
+
self,
|
|
346
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
347
|
+
conversation_history: List[Any],
|
|
348
|
+
max_history: int,
|
|
349
|
+
turn_number: int = 0,
|
|
350
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
351
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
352
|
+
previous_prompt = conversation_history[-1]
|
|
353
|
+
chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
|
|
354
|
+
|
|
355
|
+
# replace prompt with {image.jpg} tags with image content data.
|
|
356
|
+
conversation_history.pop()
|
|
357
|
+
conversation_history.append(
|
|
358
|
+
ConversationTurn(
|
|
359
|
+
role=previous_prompt.role,
|
|
360
|
+
name=previous_prompt.name,
|
|
361
|
+
message=chat_protocol_message["messages"][0]["content"],
|
|
362
|
+
full_response=previous_prompt.full_response,
|
|
363
|
+
request=chat_protocol_message,
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
msg_copy = copy.deepcopy(chat_protocol_message)
|
|
367
|
+
result = {}
|
|
368
|
+
start_time = time.time()
|
|
369
|
+
result = await self.callback(msg_copy)
|
|
370
|
+
end_time = time.time()
|
|
371
|
+
if not result:
|
|
372
|
+
result = {
|
|
373
|
+
"messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
|
|
374
|
+
"finish_reason": ["stop"],
|
|
375
|
+
"id": None,
|
|
376
|
+
"template_parameters": {},
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
time_taken = end_time - start_time
|
|
380
|
+
try:
|
|
381
|
+
response = {
|
|
382
|
+
"samples": [result["messages"][-1]["content"]],
|
|
383
|
+
"finish_reason": ["stop"],
|
|
384
|
+
"id": None,
|
|
385
|
+
}
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
msg = "User provided callback does not conform to chat protocol standard."
|
|
388
|
+
raise EvaluationException(
|
|
389
|
+
message=msg,
|
|
390
|
+
internal_message=msg,
|
|
391
|
+
target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
|
|
392
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
393
|
+
blame=ErrorBlame.USER_ERROR,
|
|
394
|
+
) from exc
|
|
395
|
+
|
|
396
|
+
return response, chat_protocol_message, time_taken, result
|
|
397
|
+
|
|
398
|
+
async def _to_chat_protocol(self, conversation_history, template_parameters): # pylint: disable=unused-argument
|
|
399
|
+
messages = []
|
|
400
|
+
|
|
401
|
+
for _, m in enumerate(conversation_history):
|
|
402
|
+
if "image:" in m.message:
|
|
403
|
+
content = await self._to_multi_modal_content(m.message)
|
|
404
|
+
messages.append({"content": content, "role": m.role.value})
|
|
405
|
+
else:
|
|
406
|
+
messages.append({"content": m.message, "role": m.role.value})
|
|
407
|
+
|
|
408
|
+
return {
|
|
409
|
+
"template_parameters": template_parameters,
|
|
410
|
+
"messages": messages,
|
|
411
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
async def _to_multi_modal_content(self, text: str) -> list:
|
|
415
|
+
split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
|
|
416
|
+
messages = [
|
|
417
|
+
text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
|
|
418
|
+
]
|
|
419
|
+
contents = []
|
|
420
|
+
for msg in messages:
|
|
421
|
+
if msg.startswith("image_understanding/"):
|
|
422
|
+
if isinstance(self.rai_client, RAIClient):
|
|
423
|
+
encoded_image = await self.rai_client.get_image_data(msg)
|
|
424
|
+
else:
|
|
425
|
+
response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
|
|
426
|
+
image_data = b"".join(response)
|
|
427
|
+
encoded_image = base64.b64encode(image_data).decode("utf-8")
|
|
428
|
+
|
|
429
|
+
contents.append(
|
|
430
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
|
|
431
|
+
)
|
|
432
|
+
else:
|
|
433
|
+
contents.append({"type": "text", "text": msg})
|
|
434
|
+
return contents
|
|
435
|
+
|
|
436
|
+
|
|
311
437
|
__all__ = [
|
|
312
438
|
"ConversationRole",
|
|
313
439
|
"ConversationBot",
|
|
314
440
|
"CallbackConversationBot",
|
|
441
|
+
"MultiModalConversationBot",
|
|
315
442
|
"ConversationTurn",
|
|
316
443
|
]
|
|
@@ -9,9 +9,9 @@ from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
|
9
9
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
11
11
|
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
12
|
-
|
|
13
12
|
from ..._http_utils import AsyncHttpPipeline
|
|
14
13
|
from . import ConversationBot, ConversationTurn
|
|
14
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
|
|
@@ -73,7 +73,7 @@ def is_closing_message_helper(response: str) -> bool:
|
|
|
73
73
|
async def simulate_conversation(
|
|
74
74
|
*,
|
|
75
75
|
bots: List[ConversationBot],
|
|
76
|
-
session: AsyncHttpPipeline,
|
|
76
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
77
77
|
language: SupportedLanguages,
|
|
78
78
|
stopping_criteria: Callable[[str], bool] = is_closing_message,
|
|
79
79
|
turn_limit: int = 10,
|
|
@@ -102,6 +102,7 @@ async def simulate_conversation(
|
|
|
102
102
|
:rtype: Tuple[Optional[str], List[ConversationTurn]]
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
+
session_state = {}
|
|
105
106
|
# Read the first prompt.
|
|
106
107
|
(first_response, request, _, full_response) = await bots[0].generate_response(
|
|
107
108
|
session=session,
|
|
@@ -150,7 +151,10 @@ async def simulate_conversation(
|
|
|
150
151
|
conversation_history=conversation_history,
|
|
151
152
|
max_history=history_limit,
|
|
152
153
|
turn_number=current_turn,
|
|
154
|
+
session_state=session_state,
|
|
153
155
|
)
|
|
156
|
+
if "session_state" in full_response and full_response["session_state"] is not None:
|
|
157
|
+
session_state.update(full_response["session_state"])
|
|
154
158
|
|
|
155
159
|
# check if conversation id is null, which means conversation starter was used. use id from next turn
|
|
156
160
|
if conversation_id is None and "id" in response:
|
|
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
|
|
|
12
12
|
|
|
13
13
|
# Azure endpoint constants
|
|
14
14
|
AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
|
|
15
|
-
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://
|
|
15
|
+
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
|
|
16
16
|
AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
|
|
17
17
|
AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
|
|
18
18
|
r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
|