azure-ai-evaluation 1.0.0b1__py3-none-any.whl → 1.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +1 -5
- azure/ai/evaluation/_common/rai_service.py +4 -4
- azure/ai/evaluation/_common/utils.py +19 -19
- azure/ai/evaluation/_constants.py +9 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +2 -1
- azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py +39 -17
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +23 -13
- azure/ai/evaluation/_evaluate/_eval_run.py +38 -18
- azure/ai/evaluation/_evaluate/_evaluate.py +35 -28
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +13 -8
- azure/ai/evaluation/_evaluate/_utils.py +29 -22
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +1 -1
- azure/ai/evaluation/_evaluators/_chat/_chat.py +16 -9
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +4 -10
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +5 -10
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +0 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +1 -2
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +9 -4
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +1 -1
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +1 -1
- azure/ai/evaluation/_evaluators/_eci/_eci.py +2 -2
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +2 -1
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +5 -10
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +1 -1
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +5 -10
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +1 -0
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +2 -2
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +2 -2
- azure/ai/evaluation/_evaluators/_qa/_qa.py +3 -14
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +5 -10
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +3 -2
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +5 -10
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +1 -2
- azure/ai/evaluation/_http_utils.py +3 -3
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/simulator/__init__.py +1 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +8 -6
- azure/ai/evaluation/simulator/_conversation/__init__.py +1 -1
- azure/ai/evaluation/simulator/_conversation/_conversation.py +16 -16
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_helpers/__init__.py +3 -2
- azure/ai/evaluation/simulator/_helpers/_experimental.py +157 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +11 -29
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +6 -6
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +2 -3
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +18 -11
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +1 -1
- azure/ai/evaluation/simulator/_model_tools/models.py +9 -11
- azure/ai/evaluation/simulator/_prompty/__init__.py +0 -0
- azure/ai/evaluation/simulator/{simulator.py → _simulator.py} +147 -80
- azure/ai/evaluation/simulator/_tracing.py +21 -24
- azure/ai/evaluation/simulator/_utils.py +4 -1
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/METADATA +86 -14
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/RECORD +58 -56
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/WHEEL +0 -0
- {azure_ai_evaluation-1.0.0b1.dist-info → azure_ai_evaluation-1.0.0b2.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
|
|
5
5
|
from concurrent.futures import as_completed
|
|
6
|
-
from typing import Union
|
|
7
6
|
|
|
8
7
|
from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
|
|
9
8
|
|
|
@@ -11,7 +10,6 @@ from .._coherence import CoherenceEvaluator
|
|
|
11
10
|
from .._f1_score import F1ScoreEvaluator
|
|
12
11
|
from .._fluency import FluencyEvaluator
|
|
13
12
|
from .._groundedness import GroundednessEvaluator
|
|
14
|
-
from ..._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration
|
|
15
13
|
from .._relevance import RelevanceEvaluator
|
|
16
14
|
from .._similarity import SimilarityEvaluator
|
|
17
15
|
|
|
@@ -52,9 +50,7 @@ class QAEvaluator:
|
|
|
52
50
|
}
|
|
53
51
|
"""
|
|
54
52
|
|
|
55
|
-
def __init__(
|
|
56
|
-
self, model_config: dict, parallel: bool = True
|
|
57
|
-
):
|
|
53
|
+
def __init__(self, model_config: dict, parallel: bool = True):
|
|
58
54
|
self._parallel = parallel
|
|
59
55
|
|
|
60
56
|
self._evaluators = [
|
|
@@ -88,12 +84,7 @@ class QAEvaluator:
|
|
|
88
84
|
with ThreadPoolExecutor() as executor:
|
|
89
85
|
futures = {
|
|
90
86
|
executor.submit(
|
|
91
|
-
evaluator,
|
|
92
|
-
query=query,
|
|
93
|
-
response=response,
|
|
94
|
-
context=context,
|
|
95
|
-
ground_truth=ground_truth,
|
|
96
|
-
**kwargs
|
|
87
|
+
evaluator, query=query, response=response, context=context, ground_truth=ground_truth, **kwargs
|
|
97
88
|
): evaluator
|
|
98
89
|
for evaluator in self._evaluators
|
|
99
90
|
}
|
|
@@ -103,9 +94,7 @@ class QAEvaluator:
|
|
|
103
94
|
results.update(future.result())
|
|
104
95
|
else:
|
|
105
96
|
for evaluator in self._evaluators:
|
|
106
|
-
result = evaluator(
|
|
107
|
-
query=query, response=response, context=context, ground_truth=ground_truth, **kwargs
|
|
108
|
-
)
|
|
97
|
+
result = evaluator(query=query, response=response, context=context, ground_truth=ground_truth, **kwargs)
|
|
109
98
|
results.update(result)
|
|
110
99
|
|
|
111
100
|
return results
|
|
@@ -4,19 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
|
-
from typing import Union
|
|
8
7
|
|
|
9
8
|
import numpy as np
|
|
10
|
-
|
|
11
9
|
from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
12
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
13
10
|
from promptflow.core import AsyncPrompty
|
|
14
11
|
|
|
15
|
-
from
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
check_and_add_user_agent_for_aoai_model_config,
|
|
19
|
-
)
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
+
|
|
14
|
+
from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
|
|
20
15
|
|
|
21
16
|
try:
|
|
22
17
|
from ..._user_agent import USER_AGENT
|
|
@@ -31,7 +26,7 @@ class _AsyncRelevanceEvaluator:
|
|
|
31
26
|
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
|
|
32
27
|
|
|
33
28
|
def __init__(self, model_config: dict):
|
|
34
|
-
|
|
29
|
+
ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
|
|
35
30
|
|
|
36
31
|
prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
|
|
37
32
|
|
|
@@ -39,7 +34,7 @@ class _AsyncRelevanceEvaluator:
|
|
|
39
34
|
# https://github.com/encode/httpx/discussions/2959
|
|
40
35
|
prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
|
|
41
36
|
|
|
42
|
-
|
|
37
|
+
ensure_user_agent_in_aoai_model_config(
|
|
43
38
|
model_config,
|
|
44
39
|
prompty_model_config,
|
|
45
40
|
USER_AGENT,
|
|
@@ -3,12 +3,13 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
from enum import Enum
|
|
5
5
|
|
|
6
|
+
from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
6
7
|
from rouge_score import rouge_scorer
|
|
7
8
|
|
|
8
|
-
from
|
|
9
|
+
from azure.core import CaseInsensitiveEnumMeta
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
class RougeType(str, Enum):
|
|
12
|
+
class RougeType(str, Enum, metaclass=CaseInsensitiveEnumMeta):
|
|
12
13
|
"""
|
|
13
14
|
Enumeration of ROUGE (Recall-Oriented Understudy for Gisting Evaluation) types.
|
|
14
15
|
"""
|
|
@@ -4,19 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
|
-
from typing import Union
|
|
8
7
|
|
|
9
8
|
import numpy as np
|
|
10
|
-
|
|
11
9
|
from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
12
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
13
10
|
from promptflow.core import AsyncPrompty
|
|
14
11
|
|
|
15
|
-
from
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
check_and_add_user_agent_for_aoai_model_config,
|
|
19
|
-
)
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
+
|
|
14
|
+
from ..._common.utils import ensure_api_version_in_aoai_model_config, ensure_user_agent_in_aoai_model_config
|
|
20
15
|
|
|
21
16
|
try:
|
|
22
17
|
from ..._user_agent import USER_AGENT
|
|
@@ -31,7 +26,7 @@ class _AsyncSimilarityEvaluator:
|
|
|
31
26
|
DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"
|
|
32
27
|
|
|
33
28
|
def __init__(self, model_config: dict):
|
|
34
|
-
|
|
29
|
+
ensure_api_version_in_aoai_model_config(model_config, self.DEFAULT_OPEN_API_VERSION)
|
|
35
30
|
|
|
36
31
|
prompty_model_config = {"configuration": model_config, "parameters": {"extra_headers": {}}}
|
|
37
32
|
|
|
@@ -39,7 +34,7 @@ class _AsyncSimilarityEvaluator:
|
|
|
39
34
|
# https://github.com/encode/httpx/discussions/2959
|
|
40
35
|
prompty_model_config["parameters"]["extra_headers"].update({"Connection": "close"})
|
|
41
36
|
|
|
42
|
-
|
|
37
|
+
ensure_user_agent_in_aoai_model_config(
|
|
43
38
|
model_config,
|
|
44
39
|
prompty_model_config,
|
|
45
40
|
USER_AGENT,
|
|
@@ -8,8 +8,7 @@ from promptflow._utils.async_utils import async_run_allowing_running_loop
|
|
|
8
8
|
|
|
9
9
|
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
10
10
|
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
|
|
11
|
-
from azure.ai.evaluation._exceptions import
|
|
12
|
-
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
11
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
from functools import wraps
|
|
7
7
|
from typing import Any, Awaitable, Callable, Dict, MutableMapping, Optional
|
|
8
8
|
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
9
12
|
from azure.core.configuration import Configuration
|
|
10
13
|
from azure.core.pipeline import AsyncPipeline, Pipeline
|
|
11
14
|
from azure.core.pipeline.policies import (
|
|
@@ -28,9 +31,6 @@ from azure.core.pipeline.transport import ( # pylint: disable=non-abstract-tran
|
|
|
28
31
|
)
|
|
29
32
|
from azure.core.rest import AsyncHttpResponse, HttpRequest, HttpResponse
|
|
30
33
|
from azure.core.rest._rest_py3 import ContentType, FilesType, ParamsType
|
|
31
|
-
from typing_extensions import Self
|
|
32
|
-
|
|
33
|
-
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def _request_fn(f: Callable[["HttpPipeline"], None]):
|
azure/ai/evaluation/_version.py
CHANGED
|
@@ -3,7 +3,7 @@ from ._adversarial_simulator import AdversarialSimulator
|
|
|
3
3
|
from ._constants import SupportedLanguages
|
|
4
4
|
from ._direct_attack_simulator import DirectAttackSimulator
|
|
5
5
|
from ._indirect_attack_simulator import IndirectAttackSimulator
|
|
6
|
-
from .
|
|
6
|
+
from ._simulator import Simulator
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"AdversarialSimulator",
|
|
@@ -8,16 +8,17 @@ import logging
|
|
|
8
8
|
import random
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional
|
|
10
10
|
|
|
11
|
-
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
12
|
-
from azure.identity import DefaultAzureCredential
|
|
13
11
|
from tqdm import tqdm
|
|
14
12
|
|
|
13
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
14
|
from azure.ai.evaluation._http_utils import get_async_http_client
|
|
16
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
17
15
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
18
16
|
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
19
17
|
from azure.ai.evaluation.simulator._adversarial_scenario import _UnstableAdversarialScenario
|
|
18
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
19
|
+
from azure.identity import DefaultAzureCredential
|
|
20
20
|
|
|
21
|
+
from ._constants import SupportedLanguages
|
|
21
22
|
from ._conversation import CallbackConversationBot, ConversationBot, ConversationRole
|
|
22
23
|
from ._conversation._conversation import simulate_conversation
|
|
23
24
|
from ._model_tools import (
|
|
@@ -27,9 +28,7 @@ from ._model_tools import (
|
|
|
27
28
|
RAIClient,
|
|
28
29
|
TokenScope,
|
|
29
30
|
)
|
|
30
|
-
from ._tracing import monitor_adversarial_scenario
|
|
31
31
|
from ._utils import JsonLineList
|
|
32
|
-
from ._constants import SupportedLanguages
|
|
33
32
|
|
|
34
33
|
logger = logging.getLogger(__name__)
|
|
35
34
|
|
|
@@ -352,7 +351,10 @@ class AdversarialSimulator:
|
|
|
352
351
|
)
|
|
353
352
|
|
|
354
353
|
if role == ConversationRole.ASSISTANT:
|
|
355
|
-
|
|
354
|
+
|
|
355
|
+
def dummy_model() -> None:
|
|
356
|
+
return None
|
|
357
|
+
|
|
356
358
|
dummy_model.name = "dummy_model"
|
|
357
359
|
return CallbackConversationBot(
|
|
358
360
|
callback=target,
|
|
@@ -11,8 +11,8 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
11
11
|
|
|
12
12
|
import jinja2
|
|
13
13
|
|
|
14
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
14
15
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
15
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
16
16
|
|
|
17
17
|
from .._model_tools import LLMBase, OpenAIChatCompletionsModel
|
|
18
18
|
from .constants import ConversationRole
|
|
@@ -6,12 +6,12 @@ import asyncio
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Callable, Dict, List, Tuple, Union
|
|
8
8
|
|
|
9
|
-
from azure.ai.evaluation.
|
|
9
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
11
|
+
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
11
12
|
|
|
12
13
|
from ..._http_utils import AsyncHttpPipeline
|
|
13
14
|
from . import ConversationBot, ConversationTurn
|
|
14
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
|
|
@@ -84,20 +84,20 @@ async def simulate_conversation(
|
|
|
84
84
|
"""
|
|
85
85
|
Simulate a conversation between the given bots.
|
|
86
86
|
|
|
87
|
-
:
|
|
88
|
-
:
|
|
89
|
-
:
|
|
90
|
-
:
|
|
91
|
-
:
|
|
92
|
-
:
|
|
93
|
-
:
|
|
94
|
-
:
|
|
95
|
-
:
|
|
96
|
-
:
|
|
97
|
-
:
|
|
98
|
-
:
|
|
99
|
-
:
|
|
100
|
-
:
|
|
87
|
+
:keyword bots: List of ConversationBot instances participating in the conversation.
|
|
88
|
+
:paramtype bots: List[ConversationBot]
|
|
89
|
+
:keyword session: The session to use for making API calls.
|
|
90
|
+
:paramtype session: AsyncHttpPipeline
|
|
91
|
+
:keyword stopping_criteria: A callable that determines when the conversation should stop.
|
|
92
|
+
:paramtype stopping_criteria: Callable[[str], bool]
|
|
93
|
+
:keyword turn_limit: The maximum number of turns in the conversation. Defaults to 10.
|
|
94
|
+
:paramtype turn_limit: int
|
|
95
|
+
:keyword history_limit: The maximum number of turns to keep in the conversation history. Defaults to 5.
|
|
96
|
+
:paramtype history_limit: int
|
|
97
|
+
:keyword api_call_delay_sec: Delay between API calls in seconds. Defaults to 0.
|
|
98
|
+
:paramtype api_call_delay_sec: float
|
|
99
|
+
:keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
|
|
100
|
+
:paramtype logger: logging.Logger
|
|
101
101
|
:return: Simulation a conversation between the given bots.
|
|
102
102
|
:rtype: Tuple
|
|
103
103
|
"""
|
|
@@ -5,17 +5,17 @@
|
|
|
5
5
|
import functools
|
|
6
6
|
import logging
|
|
7
7
|
from random import randint
|
|
8
|
-
from typing import
|
|
9
|
-
|
|
10
|
-
from azure.identity import DefaultAzureCredential
|
|
8
|
+
from typing import Callable, Optional
|
|
11
9
|
|
|
12
10
|
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
13
|
-
|
|
14
|
-
from azure.ai.evaluation.
|
|
11
|
+
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
13
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
14
|
+
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
15
|
+
from azure.identity import DefaultAzureCredential
|
|
16
16
|
|
|
17
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
18
17
|
from ._adversarial_simulator import AdversarialSimulator
|
|
18
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ._experimental import experimental
|
|
2
2
|
from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
3
|
+
from ._simulator_data_classes import ConversationHistory, Turn
|
|
3
4
|
|
|
4
|
-
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING"]
|
|
5
|
+
__all__ = ["ConversationHistory", "Turn", "SUPPORTED_LANGUAGES_MAPPING", "experimental"]
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import inspect
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Callable, Type, TypeVar, Union
|
|
10
|
+
|
|
11
|
+
from typing_extensions import ParamSpec
|
|
12
|
+
|
|
13
|
+
DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"
|
|
14
|
+
DOCSTRING_DEFAULT_INDENTATION = 8
|
|
15
|
+
EXPERIMENTAL_CLASS_MESSAGE = "This is an experimental class,"
|
|
16
|
+
EXPERIMENTAL_METHOD_MESSAGE = "This is an experimental method,"
|
|
17
|
+
EXPERIMENTAL_FIELD_MESSAGE = "This is an experimental field,"
|
|
18
|
+
EXPERIMENTAL_LINK_MESSAGE = (
|
|
19
|
+
"and may change at any time. Please see https://aka.ms/azuremlexperimental for more information."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
_warning_cache = set()
|
|
23
|
+
module_logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
TExperimental = TypeVar("TExperimental", bound=Union[Type, Callable])
|
|
26
|
+
P = ParamSpec("P")
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def experimental(wrapped: TExperimental) -> TExperimental:
|
|
31
|
+
"""Add experimental tag to a class or a method.
|
|
32
|
+
|
|
33
|
+
:param wrapped: Either a Class or Function to mark as experimental
|
|
34
|
+
:type wrapped: TExperimental
|
|
35
|
+
:return: The wrapped class or method
|
|
36
|
+
:rtype: TExperimental
|
|
37
|
+
"""
|
|
38
|
+
if inspect.isclass(wrapped):
|
|
39
|
+
return _add_class_docstring(wrapped)
|
|
40
|
+
if inspect.isfunction(wrapped):
|
|
41
|
+
return _add_method_docstring(wrapped)
|
|
42
|
+
return wrapped
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _add_class_docstring(cls: Type[T]) -> Type[T]:
|
|
46
|
+
"""Add experimental tag to the class doc string.
|
|
47
|
+
|
|
48
|
+
:return: The updated class
|
|
49
|
+
:rtype: Type[T]
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
P2 = ParamSpec("P2")
|
|
53
|
+
|
|
54
|
+
def _add_class_warning(func: Callable[P2, None]) -> Callable[P2, None]:
|
|
55
|
+
"""Add warning message for class __init__.
|
|
56
|
+
|
|
57
|
+
:param func: The original __init__ function
|
|
58
|
+
:type func: Callable[P2, None]
|
|
59
|
+
:return: Updated __init__
|
|
60
|
+
:rtype: Callable[P2, None]
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
@functools.wraps(func)
|
|
64
|
+
def wrapped(*args, **kwargs):
|
|
65
|
+
message = "Class {0}: {1} {2}".format(cls.__name__, EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
66
|
+
if not _should_skip_warning() and not _is_warning_cached(message):
|
|
67
|
+
module_logger.warning(message)
|
|
68
|
+
return func(*args, **kwargs)
|
|
69
|
+
|
|
70
|
+
return wrapped
|
|
71
|
+
|
|
72
|
+
doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_CLASS_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
73
|
+
if cls.__doc__:
|
|
74
|
+
cls.__doc__ = _add_note_to_docstring(cls.__doc__, doc_string)
|
|
75
|
+
else:
|
|
76
|
+
cls.__doc__ = doc_string + ">"
|
|
77
|
+
cls.__init__ = _add_class_warning(cls.__init__)
|
|
78
|
+
return cls
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _add_method_docstring(func: Callable[P, T] = None) -> Callable[P, T]:
|
|
82
|
+
"""Add experimental tag to the method doc string.
|
|
83
|
+
|
|
84
|
+
:param func: The function to update
|
|
85
|
+
:type func: Callable[P, T]
|
|
86
|
+
:return: A wrapped method marked as experimental
|
|
87
|
+
:rtype: Callable[P,T]
|
|
88
|
+
"""
|
|
89
|
+
doc_string = DOCSTRING_TEMPLATE.format(EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
90
|
+
if func.__doc__:
|
|
91
|
+
func.__doc__ = _add_note_to_docstring(func.__doc__, doc_string)
|
|
92
|
+
else:
|
|
93
|
+
# '>' is required. Otherwise the note section can't be generated
|
|
94
|
+
func.__doc__ = doc_string + ">"
|
|
95
|
+
|
|
96
|
+
@functools.wraps(func)
|
|
97
|
+
def wrapped(*args: P.args, **kwargs: P.kwargs) -> T:
|
|
98
|
+
message = "Method {0}: {1} {2}".format(func.__name__, EXPERIMENTAL_METHOD_MESSAGE, EXPERIMENTAL_LINK_MESSAGE)
|
|
99
|
+
if not _should_skip_warning() and not _is_warning_cached(message):
|
|
100
|
+
module_logger.warning(message)
|
|
101
|
+
return func(*args, **kwargs)
|
|
102
|
+
|
|
103
|
+
return wrapped
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _add_note_to_docstring(doc_string: str, note: str) -> str:
|
|
107
|
+
"""Adds experimental note to docstring at the top and correctly indents original docstring.
|
|
108
|
+
|
|
109
|
+
:param doc_string: The docstring
|
|
110
|
+
:type doc_string: str
|
|
111
|
+
:param note: The note to add to the docstring
|
|
112
|
+
:type note: str
|
|
113
|
+
:return: Updated docstring
|
|
114
|
+
:rtype: str
|
|
115
|
+
"""
|
|
116
|
+
indent = _get_indentation_size(doc_string)
|
|
117
|
+
doc_string = doc_string.rjust(len(doc_string) + indent)
|
|
118
|
+
return note + doc_string
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _get_indentation_size(doc_string: str) -> int:
|
|
122
|
+
"""Finds the minimum indentation of all non-blank lines after the first line.
|
|
123
|
+
|
|
124
|
+
:param doc_string: The docstring
|
|
125
|
+
:type doc_string: str
|
|
126
|
+
:return: Minimum number of indentation of the docstring
|
|
127
|
+
:rtype: int
|
|
128
|
+
"""
|
|
129
|
+
lines = doc_string.expandtabs().splitlines()
|
|
130
|
+
indent = sys.maxsize
|
|
131
|
+
for line in lines[1:]:
|
|
132
|
+
stripped = line.lstrip()
|
|
133
|
+
if stripped:
|
|
134
|
+
indent = min(indent, len(line) - len(stripped))
|
|
135
|
+
return indent if indent < sys.maxsize else DOCSTRING_DEFAULT_INDENTATION
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _should_skip_warning():
|
|
139
|
+
skip_warning_msg = False
|
|
140
|
+
|
|
141
|
+
# Cases where we want to suppress the warning:
|
|
142
|
+
# 1. When converting from REST object to SDK object
|
|
143
|
+
for frame in inspect.stack():
|
|
144
|
+
if frame.function == "_from_rest_object":
|
|
145
|
+
skip_warning_msg = True
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
return skip_warning_msg
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _is_warning_cached(warning_msg):
|
|
152
|
+
# use cache to make sure we only print same warning message once under same session
|
|
153
|
+
# this prevents duplicated warnings got printed when user does a loop call on a method or a class
|
|
154
|
+
if warning_msg in _warning_cache:
|
|
155
|
+
return True
|
|
156
|
+
_warning_cache.add(warning_msg)
|
|
157
|
+
return False
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# ---------------------------------------------------------
|
|
4
4
|
# pylint: disable=C0103,C0114,C0116
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import Union
|
|
6
|
+
from typing import Dict, List, Optional, Union
|
|
7
7
|
|
|
8
8
|
from azure.ai.evaluation.simulator._conversation.constants import ConversationRole
|
|
9
9
|
|
|
@@ -20,12 +20,12 @@ class Turn:
|
|
|
20
20
|
content: str
|
|
21
21
|
context: str = None
|
|
22
22
|
|
|
23
|
-
def to_dict(self):
|
|
23
|
+
def to_dict(self) -> Dict[str, Optional[str]]:
|
|
24
24
|
"""
|
|
25
25
|
Convert the conversation turn to a dictionary.
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
:returns: A dictionary representation of the conversation turn.
|
|
28
|
+
:rtype: Dict[str, Optional[str]]
|
|
29
29
|
"""
|
|
30
30
|
return {
|
|
31
31
|
"role": self.role.value if isinstance(self.role, ConversationRole) else self.role,
|
|
@@ -34,12 +34,6 @@ class Turn:
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
def __repr__(self):
|
|
37
|
-
"""
|
|
38
|
-
Return the string representation of the conversation turn.
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
str: A string representation of the conversation turn.
|
|
42
|
-
"""
|
|
43
37
|
return f"Turn(role={self.role}, content={self.content})"
|
|
44
38
|
|
|
45
39
|
|
|
@@ -52,42 +46,30 @@ class ConversationHistory:
|
|
|
52
46
|
"""
|
|
53
47
|
Initializes the conversation history with an empty list of turns.
|
|
54
48
|
"""
|
|
55
|
-
self.history = []
|
|
49
|
+
self.history: List[Turn] = []
|
|
56
50
|
|
|
57
51
|
def add_to_history(self, turn: Turn):
|
|
58
52
|
"""
|
|
59
53
|
Adds a turn to the conversation history.
|
|
60
54
|
|
|
61
|
-
|
|
62
|
-
|
|
55
|
+
:param turn: The conversation turn to add.
|
|
56
|
+
:type turn: Turn
|
|
63
57
|
"""
|
|
64
58
|
self.history.append(turn)
|
|
65
59
|
|
|
66
|
-
def to_list(self):
|
|
60
|
+
def to_list(self) -> List[Dict[str, str]]:
|
|
67
61
|
"""
|
|
68
62
|
Converts the conversation history to a list of dictionaries.
|
|
69
63
|
|
|
70
|
-
|
|
71
|
-
|
|
64
|
+
:returns: A list of dictionaries representing the conversation turns.
|
|
65
|
+
:rtype: List[Dict[str, str]]
|
|
72
66
|
"""
|
|
73
67
|
return [turn.to_dict() for turn in self.history]
|
|
74
68
|
|
|
75
|
-
def
|
|
76
|
-
"""
|
|
77
|
-
Returns the length of the conversation.
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
int: The number of turns in the conversation history.
|
|
81
|
-
"""
|
|
69
|
+
def __len__(self) -> int:
|
|
82
70
|
return len(self.history)
|
|
83
71
|
|
|
84
72
|
def __repr__(self):
|
|
85
|
-
"""
|
|
86
|
-
Returns the string representation of the conversation history.
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
str: A string representation of the conversation history.
|
|
90
|
-
"""
|
|
91
73
|
for turn in self.history:
|
|
92
74
|
print(turn)
|
|
93
75
|
return ""
|
|
@@ -4,17 +4,17 @@
|
|
|
4
4
|
# noqa: E501
|
|
5
5
|
import functools
|
|
6
6
|
import logging
|
|
7
|
-
from typing import
|
|
8
|
-
|
|
9
|
-
from azure.identity import DefaultAzureCredential
|
|
7
|
+
from typing import Callable
|
|
10
8
|
|
|
11
9
|
from promptflow._sdk._telemetry import ActivityType, monitor_operation
|
|
12
|
-
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
12
|
from azure.ai.evaluation._model_configurations import AzureAIProject
|
|
13
|
+
from azure.ai.evaluation.simulator import AdversarialScenario
|
|
14
|
+
from azure.identity import DefaultAzureCredential
|
|
14
15
|
|
|
15
|
-
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
16
|
-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
17
16
|
from ._adversarial_simulator import AdversarialSimulator
|
|
17
|
+
from ._model_tools import AdversarialTemplateHandler, ManagedIdentityAPITokenManager, RAIClient, TokenScope
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -8,11 +8,10 @@ import time
|
|
|
8
8
|
import uuid
|
|
9
9
|
from typing import Dict, List
|
|
10
10
|
|
|
11
|
-
from azure.core.exceptions import HttpResponseError
|
|
12
|
-
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
13
|
-
|
|
14
11
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline, get_async_http_client
|
|
15
12
|
from azure.ai.evaluation._user_agent import USER_AGENT
|
|
13
|
+
from azure.core.exceptions import HttpResponseError
|
|
14
|
+
from azure.core.pipeline.policies import AsyncRetryPolicy, RetryMode
|
|
16
15
|
|
|
17
16
|
from .models import OpenAIChatCompletionsModel
|
|
18
17
|
|