azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +4 -4
- azure/ai/evaluation/_common/rai_service.py +4 -4
- azure/ai/evaluation/_common/utils.py +40 -25
- azure/ai/evaluation/_constants.py +13 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
- azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
- azure/ai/evaluation/_evaluate/_evaluate.py +88 -63
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
- azure/ai/evaluation/_evaluate/_utils.py +29 -22
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +34 -86
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +0 -5
- azure/ai/evaluation/_evaluators/_common/__init__.py +13 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +302 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +79 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +99 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +18 -41
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +18 -39
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +18 -39
- azure/ai/evaluation/_evaluators/_eci/_eci.py +18 -55
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +29 -79
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +0 -5
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +33 -85
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -5
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +18 -65
- azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +34 -88
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +0 -5
- azure/ai/evaluation/_evaluators/{_chat → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/_retrieval.py +17 -29
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/retrieval.prompty +0 -5
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -18
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +16 -91
- azure/ai/evaluation/_exceptions.py +0 -1
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_model_configurations.py +36 -8
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
- azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
- azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
- azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +2 -1
- azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +166 -88
- azure/ai/evaluation/simulator/_tracing.py +21 -24
- azure/ai/evaluation/simulator/_utils.py +4 -1
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/METADATA +144 -14
- azure_ai_evaluation-1.0.0b3.dist-info/RECORD +98 -0
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -350
- azure/ai/evaluation/_evaluators/_chat/retrieval/__init__.py +0 -9
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -66
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure_ai_evaluation-1.0.0b1.dist-info/RECORD +0 -97
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b3.dist-info}/top_level.txt +0 -0
|
@@ -3,11 +3,6 @@ name: Similarity
|
|
|
3
3
|
description: Evaluates similarity score for QA scenario
|
|
4
4
|
model:
|
|
5
5
|
api: chat
|
|
6
|
-
configuration:
|
|
7
|
-
type: azure_openai
|
|
8
|
-
azure_deployment: ${env:AZURE_DEPLOYMENT}
|
|
9
|
-
api_key: ${env:AZURE_OPENAI_API_KEY}
|
|
10
|
-
azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT}
|
|
11
6
|
parameters:
|
|
12
7
|
temperature: 0.0
|
|
13
8
|
max_tokens: 1
|
|
@@ -2,19 +2,17 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
import logging
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
8
5
|
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from typing_extensions import override
|
|
9
8
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
|
-
from azure.ai.evaluation._common
|
|
11
|
-
|
|
12
|
-
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
9
|
+
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
10
|
+
|
|
13
11
|
|
|
14
12
|
logger = logging.getLogger(__name__)
|
|
15
13
|
|
|
16
14
|
|
|
17
|
-
class IndirectAttackEvaluator:
|
|
15
|
+
class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
|
|
18
16
|
"""A Cross-Domain Prompt Injection Attack (XPIA) jailbreak evaluator.
|
|
19
17
|
|
|
20
18
|
Detect whether cross domain injected attacks are present in your AI system's response.
|
|
@@ -26,7 +24,7 @@ class IndirectAttackEvaluator:
|
|
|
26
24
|
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
|
|
27
25
|
:type eval_last_turn: bool
|
|
28
26
|
:param credential: The credential for connecting to Azure AI project.
|
|
29
|
-
:type credential: ~azure.core.credentials.TokenCredential
|
|
27
|
+
:type credential: Optional[~azure.core.credentials.TokenCredential]
|
|
30
28
|
:return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
|
|
31
29
|
evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
|
|
32
30
|
information.
|
|
@@ -52,89 +50,16 @@ class IndirectAttackEvaluator:
|
|
|
52
50
|
}
|
|
53
51
|
"""
|
|
54
52
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
self._eval_last_turn = eval_last_turn
|
|
58
|
-
|
|
59
|
-
def __call__(
|
|
53
|
+
@override
|
|
54
|
+
def __init__(
|
|
60
55
|
self,
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
**kwargs,
|
|
56
|
+
azure_ai_project: dict,
|
|
57
|
+
credential: Optional[dict] = None,
|
|
58
|
+
eval_last_turn: bool = False,
|
|
65
59
|
):
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
:paramtype query: Optional[str]
|
|
72
|
-
:keyword response: The response to be evaluated. Mutually exclusive with 'conversation'.
|
|
73
|
-
:paramtype response: Optional[str]
|
|
74
|
-
:return: The evaluation scores and reasoning.
|
|
75
|
-
:rtype: dict
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
return self._evaluator(query=query, response=response, **kwargs)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class _AsyncIndirectAttackEvaluator:
|
|
82
|
-
def __init__(self, azure_ai_project: dict, credential=None):
|
|
83
|
-
self._azure_ai_project = azure_ai_project
|
|
84
|
-
self._credential = credential
|
|
85
|
-
|
|
86
|
-
async def __call__(self, *, query: str, response: str, **kwargs):
|
|
87
|
-
"""
|
|
88
|
-
Evaluates content according to this evaluator's metric.
|
|
89
|
-
:keyword query: The query to be evaluated.
|
|
90
|
-
:paramtype query: str
|
|
91
|
-
:keyword response: The response to be evaluated.
|
|
92
|
-
:paramtype response: str
|
|
93
|
-
:return: The evaluation score computation based on the metric (self.metric).
|
|
94
|
-
:rtype: Any
|
|
95
|
-
"""
|
|
96
|
-
# Validate inputs
|
|
97
|
-
# Raises value error if failed, so execution alone signifies success.
|
|
98
|
-
if not (query and query.strip() and query != "None") or not (
|
|
99
|
-
response and response.strip() and response != "None"
|
|
100
|
-
):
|
|
101
|
-
msg = "Both 'query' and 'response' must be non-empty strings."
|
|
102
|
-
raise EvaluationException(
|
|
103
|
-
message=msg,
|
|
104
|
-
internal_message=msg,
|
|
105
|
-
error_category=ErrorCategory.MISSING_FIELD,
|
|
106
|
-
error_blame=ErrorBlame.USER_ERROR,
|
|
107
|
-
error_target=ErrorTarget.INDIRECT_ATTACK_EVALUATOR,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Run score computation based on supplied metric.
|
|
111
|
-
result = await evaluate_with_rai_service(
|
|
112
|
-
metric_name=EvaluationMetrics.XPIA,
|
|
113
|
-
query=query,
|
|
114
|
-
response=response,
|
|
115
|
-
project_scope=self._azure_ai_project,
|
|
116
|
-
credential=self._credential,
|
|
60
|
+
super().__init__(
|
|
61
|
+
eval_metric=EvaluationMetrics.XPIA,
|
|
62
|
+
azure_ai_project=azure_ai_project,
|
|
63
|
+
credential=credential,
|
|
64
|
+
eval_last_turn=eval_last_turn,
|
|
117
65
|
)
|
|
118
|
-
return result
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
class _IndirectAttackEvaluator:
|
|
122
|
-
def __init__(self, azure_ai_project: dict, credential=None):
|
|
123
|
-
self._async_evaluator = _AsyncIndirectAttackEvaluator(azure_ai_project, credential)
|
|
124
|
-
|
|
125
|
-
def __call__(self, *, query: str, response: str, **kwargs):
|
|
126
|
-
"""
|
|
127
|
-
Evaluates XPIA content.
|
|
128
|
-
:keyword query: The query to be evaluated.
|
|
129
|
-
:paramtype query: str
|
|
130
|
-
:keyword response: The response to be evaluated.
|
|
131
|
-
:paramtype response: str
|
|
132
|
-
:keyword context: The context to be evaluated.
|
|
133
|
-
:paramtype context: str
|
|
134
|
-
:return: The XPIA score.
|
|
135
|
-
:rtype: dict
|
|
136
|
-
"""
|
|
137
|
-
return async_run_allowing_running_loop(self._async_evaluator, query=query, response=response, **kwargs)
|
|
138
|
-
|
|
139
|
-
def _to_async(self):
|
|
140
|
-
return self._async_evaluator
|
|
@@ -54,7 +54,6 @@ class ErrorTarget(Enum):
|
|
|
54
54
|
EVAL_RUN = "EvalRun"
|
|
55
55
|
CODE_CLIENT = "CodeClient"
|
|
56
56
|
RAI_CLIENT = "RAIClient"
|
|
57
|
-
CHAT_EVALUATOR = "ChatEvaluator"
|
|
58
57
|
COHERENCE_EVALUATOR = "CoherenceEvaluator"
|
|
59
58
|
CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
|
|
60
59
|
ECI_EVALUATOR = "ECIEvaluator"
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
from functools import wraps
|
|
7
7
|
from typing import Any, Awaitable, Callable, Dict, MutableMapping, Optional
|
|
8
8
|
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
9
12
|
from azure.core.configuration import Configuration
|
|
10
13
|
from azure.core.pipeline import AsyncPipeline, Pipeline
|
|
11
14
|
from azure.core.pipeline.policies import (
|
|
@@ -28,9 +31,6 @@ from azure.core.pipeline.transport import ( # pylint: disable=non-abstract-tran
|
|
|
28
31
|
)
|
|
29
32
|
from azure.core.rest import AsyncHttpResponse, HttpRequest, HttpResponse
|
|
30
33
|
from azure.core.rest._rest_py3 import ContentType, FilesType, ParamsType
|
|
31
|
-
from typing_extensions import Self
|
|
32
|
-
|
|
33
|
-
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def _request_fn(f: Callable[["HttpPipeline"], None]):
|
|
@@ -2,26 +2,54 @@
|
|
|
2
2
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
|
-
from typing import TypedDict
|
|
5
|
+
from typing import Dict, Literal, TypedDict
|
|
6
6
|
|
|
7
|
+
from typing_extensions import NotRequired
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
class AzureOpenAIModelConfiguration(TypedDict, total=False):
|
|
11
|
+
"""Model Configuration for Azure OpenAI Model"""
|
|
12
|
+
|
|
13
|
+
type: Literal["azure_openai"]
|
|
14
|
+
"""The type of the model configuration. Should be 'azure_openai' for AzureOpenAIModelConfiguration"""
|
|
9
15
|
azure_deployment: str
|
|
16
|
+
"""Name of Azure OpenAI deployment to make request to"""
|
|
10
17
|
azure_endpoint: str
|
|
18
|
+
"""Endpoint of Azure OpenAI resource to make request to"""
|
|
11
19
|
api_key: str
|
|
20
|
+
"""API key of Azure OpenAI resource"""
|
|
21
|
+
api_version: NotRequired[str]
|
|
22
|
+
"""(Optional) API version to use in request to Azure OpenAI deployment"""
|
|
12
23
|
|
|
13
24
|
|
|
14
|
-
class
|
|
15
|
-
|
|
25
|
+
class OpenAIModelConfiguration(TypedDict, total=False):
|
|
26
|
+
"""Model Configuration for OpenAI Model"""
|
|
16
27
|
|
|
17
|
-
|
|
18
|
-
|
|
28
|
+
type: Literal["openai"]
|
|
29
|
+
"""The type of the model configuration. Should be 'openai' for OpenAIModelConfiguration"""
|
|
19
30
|
api_key: str
|
|
20
|
-
|
|
21
|
-
|
|
31
|
+
"API key needed to make request to model"
|
|
32
|
+
model: str
|
|
33
|
+
"""Name of model to be used in OpenAI request"""
|
|
34
|
+
base_url: NotRequired[str]
|
|
35
|
+
"""(Optional) Base URL to be used in OpenAI request"""
|
|
36
|
+
organization: NotRequired[str]
|
|
37
|
+
"""(Optional) OpenAI organization"""
|
|
22
38
|
|
|
23
39
|
|
|
24
40
|
class AzureAIProject(TypedDict):
|
|
41
|
+
"""Azure AI Project Information"""
|
|
42
|
+
|
|
25
43
|
subscription_id: str
|
|
44
|
+
"""Azure subscription id of the project"""
|
|
26
45
|
resource_group_name: str
|
|
46
|
+
"""Azure resource group name of the project"""
|
|
27
47
|
project_name: str
|
|
48
|
+
"""Azure project name"""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class EvaluatorConfig(TypedDict, total=False):
|
|
52
|
+
"""Configuration for an evaluator"""
|
|
53
|
+
|
|
54
|
+
column_mapping: Dict[str, str]
|
|
55
|
+
"""Dictionary mapping evaluator input name to column in data"""
|
azure/ai/evaluation/_version.py
CHANGED
|
@@ -3,7 +3,7 @@ from ._adversarial_simulator import AdversarialSimulator
|
|
|
3
3
|
from ._constants import SupportedLanguages
|
|
4
4
|
from ._direct_attack_simulator import DirectAttackSimulator
|
|
5
5
|
from ._indirect_attack_simulator import IndirectAttackSimulator
|
|
6
|
-
from .
|
|
6
|
+
from ._simulator import Simulator
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"AdversarialSimulator",
|
|
@@ -8,16 +8,17 @@ import logging
|
|
|
8
8
|
import random
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional
|
|
10
10
|
|
|
11
|
-
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
12
|
-
from azure.identity import DefaultAzureCredential
|
|
13
11
|
from tqdm import tqdm
|
|
14
12
|
|
|
13
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
14
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
16
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
17
15
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
18
16
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
19
17
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
18
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
19
|
+
from azure.identity import DefaultAzureCredential
|
|
20
20
|
|
|
21
|
+
from ._constants import SupportedLanguages
|
|
21
22
|
from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
|
|
22
23
|
from ._conversation._conversation import simulate_conversation
|
|
23
24
|
from ._model_tools import (
|
|
@@ -27,9 +28,7 @@ from ._model_tools import (
|
|
|
27
28
|
RAIClient,
|
|
28
29
|
TokenScope,
|
|
29
30
|
)
|
|
30
|
-
from ._tracing import monitor_adversarial_scenario
|
|
31
31
|
from ._utils import JsonLineList
|
|
32
|
-
from ._constants import SupportedLanguages
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
35
34
|
|
|
@@ -352,7 +351,10 @@ class AdversarialSimulator:
|
|
|
352
351
|
)
|
|
353
352
|
|
|
354
353
|
if role == ConversationRole.ASSISTANT:
|
|
355
|
-
|
|
354
|
+
|
|
355
|
+
def dummy_model() -> None:
|
|
356
|
+
return None
|
|
357
|
+
|
|
356
358
|
dummy_model.name = "dummy_model"
|
|
357
359
|
return CallbackConversationBot(
|
|
358
360
|
callback=target,
|
|
@@ -11,8 +11,8 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
11
11
|
|
|
12
12
|
import jinja2
|
|
13
13
|
|
|
14
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
15
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
15
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
16
16
|
|
|
17
17
|
from .._model_tools import LLMBase, OpenAIChatCompletionsModel
|
|
18
18
|
from .constants import ConversationRole
|
|
@@ -6,12 +6,12 @@ import asyncio
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Callable, Dict, List, Tuple, Union
|
|
8
8
|
|
|
9
|
-
from azure.ai.evaluation.
|
|
9
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
11
|
+
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
11
12
|
|
|
12
13
|
from ..._http_utils import AsyncHttpPipeline
|
|
13
14
|
from . import ConversationBot, ConversationTurn
|
|
14
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
|
|
@@ -84,20 +84,20 @@ async def simulate_conversation(
|
|
|
84
84
|
"""
|
|
85
85
|
Simulate a conversation between the given bots.
|
|
86
86
|
|
|
87
|
-
:
|
|
88
|
-
:
|
|
89
|
-
:
|
|
90
|
-
:
|
|
91
|
-
:
|
|
92
|
-
:
|
|
93
|
-
:
|
|
94
|
-
:
|
|
95
|
-
:
|
|
96
|
-
:
|
|
97
|
-
:
|
|
98
|
-
:
|
|
99
|
-
:
|
|
100
|
-
:
|
|
87
|
+
:keyword bots: List of ConversationBot instances participating in the conversation.
|
|
88
|
+
:paramtype bots: List[ConversationBot]
|
|
89
|
+
:keyword session: The session to use for making API calls.
|
|
90
|
+
:paramtype session: AsyncHttpPipeline
|
|
91
|
+
:keyword stopping_criteria: A callable that determines when the conversation should stop.
|
|
92
|
+
:paramtype stopping_criteria: Callable[[str], bool]
|
|
93
|
+
:keyword turn_limit: The maximum number of turns in the conversation. Defaults to 10.
|
|
94
|
+
:paramtype turn_limit: int
|
|
95
|
+
:keyword history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
|
|
96
|
+
:paramtype history_limit: int
|
|
97
|
+
:keyword api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
|
|
98
|
+
:paramtype api_call_delay_sec: float
|
|
99
|
+
:keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
|
|
100
|
+
:paramtype logger: logging.Logger
|
|
101
101
|
:return: Simulation a conversation between the given bots.
|
|
102
102
|
:rtype: Tuple
|
|
103
103
|
"""
|
|
@@ -5,17 +5,17 @@
|
|
|
5
5
|
import functools
|
|
6
6
|
import logging
|
|
7
7
|
from random import randint
|
|
8
|
-
from typing import
|
|
9
|
-
|
|
10
|
-
from azure.identity import DefaultAzureCredential
|
|
8
|
+
from typing import Callable, Optional
|
|
11
9
|
|
|
12
10
|
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
13
|
-
|
|
14
|
-
from azure.ai.evaluation.
|
|
11
|
+
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
13
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
|
+
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
15
|
+
from azure.identity import DefaultAzureCredential
|
|
16
16
|
|
|
17
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
18
17
|
from ._adversarial_simulator import AdversarialSimulator
|
|
18
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._experimental import experimental
|
|
2
2
|
from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
3
|
+
from ._simulator_data_classes import ConversationHistory, Turn
|
|
3
4
|
|
|
4
|
-
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
|
|
5
|
+
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING", "experimental"]
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import inspect
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Callable, Type, TypeVar, Union
|
|
10
|
+
|
|
11
|
+
from typing_extensions import ParamSpec
|
|
12
|
+
|
|
13
|
+
DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"
|
|
14
|
+
DOCSTRING_DEFAULT_INDENTATION = 8
|
|
15
|
+
EXPERIMENTAL_CLASS_MESSAGE = "This is an experimental class,"
|
|
16
|
+
EXPERIMENTAL_METHOD_MESSAGE = "This is an experimental method,"
|
|
17
|
+
EXPERIMENTAL_FIELD_MESSAGE = "This is an experimental field,"
|
|
18
|
+
EXPERIMENTAL_LINK_MESSAGE = (
|
|
19
|
+
"and may change at any time. Please see https://aka.ms/azuremlexperimental for more information."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
_warning_cache = set()
|
|
23
|
+
module_logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
TExperimental = TypeVar("TExperimental", bound=Union[Type, Callable])
|
|
26
|
+
P = ParamSpec("P")
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def experimental(wrapped: TExperimental) -> TExperimental:
|
|
31
|
+
"""Add experimental tag to a class or a method.
|
|
32
|
+
|
|
33
|
+
:param wrapped: Either a Class or Function to mark as experimental
|
|
34
|
+
:type wrapped: TExperimental
|
|
35
|
+
:return: The wrapped class or method
|
|
36
|
+
:rtype: TExperimental
|
|
37
|
+
"""
|
|
38
|
+
if inspect.isclass(wrapped):
|
|
39
|
+
return _add_class_docstring(wrapped)
|
|
40
|
+
if inspect.isfunction(wrapped):
|
|
41
|
+
return _add_method_docstring(wrapped)
|
|
42
|
+
return wrapped
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _add_class_docstring(cls: Type[T]) -> Type[T]:
|
|
46
|
+
"""Add experimental tag to the class doc string.
|
|
47
|
+
|
|
48
|
+
:return: The updated class
|
|
49
|
+
:rtype: Type[T]
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
P2 = ParamSpec("P2")
|
|
53
|
+
|
|
54
|
+
def _add_class_warning(func: Callable[P2, None]) -> Callable[P2, None]:
|
|
55
|
+
"""Add warning message for class __init__.
|
|
56
|
+
|
|
57
|
+
:param func: The original __init__ function
|
|
58
|
+
:type func: Callable[P2, None]
|
|
59
|
+
:return: Updated __init__
|
|
60
|
+
:rtype: Callable[P2, None]
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
@functools.wraps(func)
|
|
64
|
+
def wrapped(*args, **kwargs):
|
|
65
|
+
message = "Class {0}: {1} {2}".format(cls.__name__, EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
66
|
+
if not _should_skip_warning() and not _is_warning_cached(message):
|
|
67
|
+
module_logger.warning(message)
|
|
68
|
+
return func(*args, **kwargs)
|
|
69
|
+
|
|
70
|
+
return wrapped
|
|
71
|
+
|
|
72
|
+
doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
73
|
+
if cls.__doc__:
|
|
74
|
+
cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
|
|
75
|
+
else:
|
|
76
|
+
cls.__doc__ = doc_string + ">"
|
|
77
|
+
cls.__init__ = _add_class_warning(cls.__init__)
|
|
78
|
+
return cls
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _add_method_docstring(func: Callable[P, T] = None) -> Callable[P, T]:
|
|
82
|
+
"""Add experimental tag to the method doc string.
|
|
83
|
+
|
|
84
|
+
:param func: The function to update
|
|
85
|
+
:type func: Callable[P, T]
|
|
86
|
+
:return: A wrapped method marked as experimental
|
|
87
|
+
:rtype: Callable[P,T]
|
|
88
|
+
"""
|
|
89
|
+
doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
90
|
+
if func.__doc__:
|
|
91
|
+
func.__doc__ = _add_note_to_docstring(func.__doc__, doc_string)
|
|
92
|
+
else:
|
|
93
|
+
# '>' is required. Otherwise the note section can't be generated
|
|
94
|
+
func.__doc__ = doc_string + ">"
|
|
95
|
+
|
|
96
|
+
@functools.wraps(func)
|
|
97
|
+
def wrapped(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
98
|
+
message = "Method {0}: {1} {2}".format(func.__name__, EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
99
|
+
if not _should_skip_warning() and not _is_warning_cached(message):
|
|
100
|
+
module_logger.warning(message)
|
|
101
|
+
return func(*args, **kwargs)
|
|
102
|
+
|
|
103
|
+
return wrapped
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _add_note_to_docstring(doc_string: str, note: str) -> str:
|
|
107
|
+
"""Adds experimental note to docstring at the top and correctly indents original docstring.
|
|
108
|
+
|
|
109
|
+
:param doc_string: The docstring
|
|
110
|
+
:type doc_string: str
|
|
111
|
+
:param note: The note to add to the docstring
|
|
112
|
+
:type note: str
|
|
113
|
+
:return: Updated docstring
|
|
114
|
+
:rtype: str
|
|
115
|
+
"""
|
|
116
|
+
indent = _get_indentation_size(doc_string)
|
|
117
|
+
doc_string = doc_string.rjust(len(doc_string) + indent)
|
|
118
|
+
return note + doc_string
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _get_indentation_size(doc_string: str) -> int:
|
|
122
|
+
"""Finds the minimum indentation of all non-blank lines after the first line.
|
|
123
|
+
|
|
124
|
+
:param doc_string: The docstring
|
|
125
|
+
:type doc_string: str
|
|
126
|
+
:return: Minimum number of indentation of the docstring
|
|
127
|
+
:rtype: int
|
|
128
|
+
"""
|
|
129
|
+
lines = doc_string.expandtabs().splitlines()
|
|
130
|
+
indent = sys.maxsize
|
|
131
|
+
for line in lines[1:]:
|
|
132
|
+
stripped = line.lstrip()
|
|
133
|
+
if stripped:
|
|
134
|
+
indent = min(indent, len(line) - len(stripped))
|
|
135
|
+
return indent if indent < sys.maxsize else DOCSTRING_DEFAULT_INDENTATION
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _should_skip_warning():
|
|
139
|
+
skip_warning_msg = False
|
|
140
|
+
|
|
141
|
+
# Cases where we want to suppress the warning:
|
|
142
|
+
# 1. When converting from REST object to SDK object
|
|
143
|
+
for frame in inspect.stack():
|
|
144
|
+
if frame.function == "_from_rest_object":
|
|
145
|
+
skip_warning_msg = True
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
return skip_warning_msg
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _is_warning_cached(warning_msg):
|
|
152
|
+
# use cache to make sure we only print same warning message once under same session
|
|
153
|
+
# this prevents duplicated warnings got printed when user does a loop call on a method or a class
|
|
154
|
+
if warning_msg in _warning_cache:
|
|
155
|
+
return True
|
|
156
|
+
_warning_cache.add(warning_msg)
|
|
157
|
+
return False
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# pylint: disable=C0103,C0114,C0116
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Union
|
|
6
|
+
from typing import Dict, List, Optional, Union
|
|
7
7
|
|
|
8
8
|
from azure.ai.evaluation.simulator._conversation.constants import ConversationRole
|
|
9
9
|
|
|
@@ -20,12 +20,12 @@ class Turn:
|
|
|
20
20
|
content: str
|
|
21
21
|
context: str = None
|
|
22
22
|
|
|
23
|
-
def to_dict(self):
|
|
23
|
+
def to_dict(self) -> Dict[str, Optional[str]]:
|
|
24
24
|
"""
|
|
25
25
|
Convert the conversation turn to a dictionary.
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
:returns: A dictionary representation of the conversation turn.
|
|
28
|
+
:rtype: Dict[str, Optional[str]]
|
|
29
29
|
"""
|
|
30
30
|
return {
|
|
31
31
|
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
@@ -34,12 +34,6 @@ class Turn:
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
def __repr__(self):
|
|
37
|
-
"""
|
|
38
|
-
Return the string representation of the conversation turn.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
str: A string representation of the conversation turn.
|
|
42
|
-
"""
|
|
43
37
|
return f"Turn(role={self.role}, content={self.content})"
|
|
44
38
|
|
|
45
39
|
|
|
@@ -52,42 +46,30 @@ class ConversationHistory:
|
|
|
52
46
|
"""
|
|
53
47
|
Initializes the conversation history with an empty list of turns.
|
|
54
48
|
"""
|
|
55
|
-
self.history = []
|
|
49
|
+
self.history: List[Turn] = []
|
|
56
50
|
|
|
57
51
|
def add_to_history(self, turn: Turn):
|
|
58
52
|
"""
|
|
59
53
|
Adds a turn to the conversation history.
|
|
60
54
|
|
|
61
|
-
|
|
62
|
-
|
|
55
|
+
:param turn: The conversation turn to add.
|
|
56
|
+
:type turn: Turn
|
|
63
57
|
"""
|
|
64
58
|
self.history.append(turn)
|
|
65
59
|
|
|
66
|
-
def to_list(self):
|
|
60
|
+
def to_list(self) -> List[Dict[str, str]]:
|
|
67
61
|
"""
|
|
68
62
|
Converts the conversation history to a list of dictionaries.
|
|
69
63
|
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
:returns: A list of dictionaries representing the conversation turns.
|
|
65
|
+
:rtype: List[Dict[str, str]]
|
|
72
66
|
"""
|
|
73
67
|
return [turn.to_dict() for turn in self.history]
|
|
74
68
|
|
|
75
|
-
def
|
|
76
|
-
"""
|
|
77
|
-
Returns the length of the conversation.
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
int: The number of turns in the conversation history.
|
|
81
|
-
"""
|
|
69
|
+
def __len__(self) -> int:
|
|
82
70
|
return len(self.history)
|
|
83
71
|
|
|
84
72
|
def __repr__(self):
|
|
85
|
-
"""
|
|
86
|
-
Returns the string representation of the conversation history.
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
str: A string representation of the conversation history.
|
|
90
|
-
"""
|
|
91
73
|
for turn in self.history:
|
|
92
74
|
print(turn)
|
|
93
75
|
return ""
|
|
@@ -4,17 +4,17 @@
|
|
|
4
4
|
# noqa: E501
|
|
5
5
|
import functools
|
|
6
6
|
import logging
|
|
7
|
-
from typing import
|
|
8
|
-
|
|
9
|
-
from azure.identity import DefaultAzureCredential
|
|
7
|
+
from typing import Callable
|
|
10
8
|
|
|
11
9
|
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
12
|
-
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
12
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
13
|
+
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
|
+
from azure.identity import DefaultAzureCredential
|
|
14
15
|
|
|
15
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
16
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
17
16
|
from ._adversarial_simulator import AdversarialSimulator
|
|
17
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|