azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -7,14 +7,16 @@ import copy
|
|
|
7
7
|
import logging
|
|
8
8
|
import time
|
|
9
9
|
from dataclasses import dataclass
|
|
10
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
11
|
-
|
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
|
|
11
|
+
import base64
|
|
12
|
+
import re
|
|
12
13
|
import jinja2
|
|
13
14
|
|
|
14
15
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
15
16
|
from azure.ai.evaluation._http_utils import AsyncHttpPipeline
|
|
16
|
-
|
|
17
|
-
from
|
|
17
|
+
from .._model_tools import LLMBase, OpenAIChatCompletionsModel, RAIClient
|
|
18
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
19
|
+
from .._model_tools._template_handler import TemplateParameters
|
|
18
20
|
from .constants import ConversationRole
|
|
19
21
|
|
|
20
22
|
|
|
@@ -40,7 +42,7 @@ class ConversationTurn:
|
|
|
40
42
|
role: "ConversationRole"
|
|
41
43
|
name: Optional[str] = None
|
|
42
44
|
message: str = ""
|
|
43
|
-
full_response: Optional[Any] = None
|
|
45
|
+
full_response: Optional[Dict[str, Any]] = None
|
|
44
46
|
request: Optional[Any] = None
|
|
45
47
|
|
|
46
48
|
def to_openai_chat_format(self, reverse: bool = False) -> Dict[str, str]:
|
|
@@ -109,7 +111,7 @@ class ConversationBot:
|
|
|
109
111
|
role: ConversationRole,
|
|
110
112
|
model: Union[LLMBase, OpenAIChatCompletionsModel],
|
|
111
113
|
conversation_template: str,
|
|
112
|
-
instantiation_parameters:
|
|
114
|
+
instantiation_parameters: TemplateParameters,
|
|
113
115
|
) -> None:
|
|
114
116
|
self.role = role
|
|
115
117
|
self.conversation_template_orig = conversation_template
|
|
@@ -118,13 +120,13 @@ class ConversationBot:
|
|
|
118
120
|
)
|
|
119
121
|
self.persona_template_args = instantiation_parameters
|
|
120
122
|
if self.role == ConversationRole.USER:
|
|
121
|
-
self.name = self.persona_template_args.get("name", role.value)
|
|
123
|
+
self.name: str = cast(str, self.persona_template_args.get("name", role.value))
|
|
122
124
|
else:
|
|
123
|
-
self.name = self.persona_template_args.get("chatbot_name", role.value) or model.name
|
|
125
|
+
self.name = cast(str, self.persona_template_args.get("chatbot_name", role.value)) or model.name
|
|
124
126
|
self.model = model
|
|
125
127
|
|
|
126
128
|
self.logger = logging.getLogger(repr(self))
|
|
127
|
-
self.conversation_starter
|
|
129
|
+
self.conversation_starter: Optional[Union[str, jinja2.Template, Dict]] = None
|
|
128
130
|
if role == ConversationRole.USER:
|
|
129
131
|
if "conversation_starter" in self.persona_template_args:
|
|
130
132
|
conversation_starter_content = self.persona_template_args["conversation_starter"]
|
|
@@ -135,7 +137,7 @@ class ConversationBot:
|
|
|
135
137
|
self.conversation_starter = jinja2.Template(
|
|
136
138
|
conversation_starter_content, undefined=jinja2.StrictUndefined
|
|
137
139
|
)
|
|
138
|
-
except jinja2.exceptions.TemplateSyntaxError: # noqa: F841
|
|
140
|
+
except jinja2.exceptions.TemplateSyntaxError as e: # noqa: F841
|
|
139
141
|
self.conversation_starter = conversation_starter_content
|
|
140
142
|
else:
|
|
141
143
|
self.logger.info(
|
|
@@ -144,11 +146,12 @@ class ConversationBot:
|
|
|
144
146
|
|
|
145
147
|
async def generate_response(
|
|
146
148
|
self,
|
|
147
|
-
session: AsyncHttpPipeline,
|
|
149
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
148
150
|
conversation_history: List[ConversationTurn],
|
|
149
151
|
max_history: int,
|
|
150
152
|
turn_number: int = 0,
|
|
151
|
-
|
|
153
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
154
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
152
155
|
"""
|
|
153
156
|
Prompt the ConversationBot for a response.
|
|
154
157
|
|
|
@@ -161,7 +164,7 @@ class ConversationBot:
|
|
|
161
164
|
:param turn_number: Parameters used to query GPT-4 model.
|
|
162
165
|
:type turn_number: int
|
|
163
166
|
:return: The response from the ConversationBot.
|
|
164
|
-
:rtype: Tuple[dict, dict,
|
|
167
|
+
:rtype: Tuple[dict, dict, float, dict]
|
|
165
168
|
"""
|
|
166
169
|
|
|
167
170
|
# check if this is the first turn and the conversation_starter is not None,
|
|
@@ -169,11 +172,14 @@ class ConversationBot:
|
|
|
169
172
|
if turn_number == 0 and self.conversation_starter is not None:
|
|
170
173
|
# if conversation_starter is a dictionary, pass it into samples as is
|
|
171
174
|
if isinstance(self.conversation_starter, dict):
|
|
172
|
-
samples = [self.conversation_starter]
|
|
175
|
+
samples: List[Union[str, jinja2.Template, Dict]] = [self.conversation_starter]
|
|
173
176
|
if isinstance(self.conversation_starter, jinja2.Template):
|
|
174
177
|
samples = [self.conversation_starter.render(**self.persona_template_args)]
|
|
175
178
|
else:
|
|
176
|
-
samples = [self.conversation_starter]
|
|
179
|
+
samples = [self.conversation_starter]
|
|
180
|
+
jailbreak_string = self.persona_template_args.get("jailbreak_string", None)
|
|
181
|
+
if jailbreak_string:
|
|
182
|
+
samples = [f"{jailbreak_string} {samples[0]}"]
|
|
177
183
|
time_taken = 0
|
|
178
184
|
|
|
179
185
|
finish_reason = ["stop"]
|
|
@@ -238,7 +244,7 @@ class CallbackConversationBot(ConversationBot):
|
|
|
238
244
|
self,
|
|
239
245
|
callback: Callable,
|
|
240
246
|
user_template: str,
|
|
241
|
-
user_template_parameters:
|
|
247
|
+
user_template_parameters: TemplateParameters,
|
|
242
248
|
*args,
|
|
243
249
|
**kwargs,
|
|
244
250
|
) -> None:
|
|
@@ -250,18 +256,19 @@ class CallbackConversationBot(ConversationBot):
|
|
|
250
256
|
|
|
251
257
|
async def generate_response(
|
|
252
258
|
self,
|
|
253
|
-
session: AsyncHttpPipeline,
|
|
259
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
254
260
|
conversation_history: List[Any],
|
|
255
261
|
max_history: int,
|
|
256
262
|
turn_number: int = 0,
|
|
257
|
-
|
|
263
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
264
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
258
265
|
chat_protocol_message = self._to_chat_protocol(
|
|
259
266
|
self.user_template, conversation_history, self.user_template_parameters
|
|
260
267
|
)
|
|
261
268
|
msg_copy = copy.deepcopy(chat_protocol_message)
|
|
262
269
|
result = {}
|
|
263
270
|
start_time = time.time()
|
|
264
|
-
result = await self.callback(msg_copy)
|
|
271
|
+
result = await self.callback(msg_copy, session_state=session_state)
|
|
265
272
|
end_time = time.time()
|
|
266
273
|
if not result:
|
|
267
274
|
result = {
|
|
@@ -270,8 +277,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
270
277
|
"id": None,
|
|
271
278
|
"template_parameters": {},
|
|
272
279
|
}
|
|
273
|
-
self.logger.info("Using user provided callback returning response.")
|
|
274
|
-
|
|
275
280
|
time_taken = end_time - start_time
|
|
276
281
|
try:
|
|
277
282
|
response = {
|
|
@@ -289,8 +294,6 @@ class CallbackConversationBot(ConversationBot):
|
|
|
289
294
|
blame=ErrorBlame.USER_ERROR,
|
|
290
295
|
) from exc
|
|
291
296
|
|
|
292
|
-
self.logger.info("Parsed callback response")
|
|
293
|
-
|
|
294
297
|
return response, {}, time_taken, result
|
|
295
298
|
|
|
296
299
|
# Bug 3354264: template is unused in the method - is this intentional?
|
|
@@ -307,9 +310,134 @@ class CallbackConversationBot(ConversationBot):
|
|
|
307
310
|
}
|
|
308
311
|
|
|
309
312
|
|
|
313
|
+
class MultiModalConversationBot(ConversationBot):
|
|
314
|
+
"""MultiModal Conversation bot that uses a user provided callback to generate responses.
|
|
315
|
+
|
|
316
|
+
:param callback: The callback function to use to generate responses.
|
|
317
|
+
:type callback: Callable
|
|
318
|
+
:param user_template: The template to use for the request.
|
|
319
|
+
:type user_template: str
|
|
320
|
+
:param user_template_parameters: The template parameters to use for the request.
|
|
321
|
+
:type user_template_parameters: Dict
|
|
322
|
+
:param args: Optional arguments to pass to the parent class.
|
|
323
|
+
:type args: Any
|
|
324
|
+
:param kwargs: Optional keyword arguments to pass to the parent class.
|
|
325
|
+
:type kwargs: Any
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
def __init__(
|
|
329
|
+
self,
|
|
330
|
+
callback: Callable,
|
|
331
|
+
user_template: str,
|
|
332
|
+
user_template_parameters: TemplateParameters,
|
|
333
|
+
rai_client: Union[RAIClient, AIProjectClient],
|
|
334
|
+
*args,
|
|
335
|
+
**kwargs,
|
|
336
|
+
) -> None:
|
|
337
|
+
self.callback = callback
|
|
338
|
+
self.user_template = user_template
|
|
339
|
+
self.user_template_parameters = user_template_parameters
|
|
340
|
+
self.rai_client = rai_client
|
|
341
|
+
|
|
342
|
+
super().__init__(*args, **kwargs)
|
|
343
|
+
|
|
344
|
+
async def generate_response(
|
|
345
|
+
self,
|
|
346
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
347
|
+
conversation_history: List[Any],
|
|
348
|
+
max_history: int,
|
|
349
|
+
turn_number: int = 0,
|
|
350
|
+
session_state: Optional[Dict[str, Any]] = None,
|
|
351
|
+
) -> Tuple[dict, dict, float, dict]:
|
|
352
|
+
previous_prompt = conversation_history[-1]
|
|
353
|
+
chat_protocol_message = await self._to_chat_protocol(conversation_history, self.user_template_parameters)
|
|
354
|
+
|
|
355
|
+
# replace prompt with {image.jpg} tags with image content data.
|
|
356
|
+
conversation_history.pop()
|
|
357
|
+
conversation_history.append(
|
|
358
|
+
ConversationTurn(
|
|
359
|
+
role=previous_prompt.role,
|
|
360
|
+
name=previous_prompt.name,
|
|
361
|
+
message=chat_protocol_message["messages"][0]["content"],
|
|
362
|
+
full_response=previous_prompt.full_response,
|
|
363
|
+
request=chat_protocol_message,
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
msg_copy = copy.deepcopy(chat_protocol_message)
|
|
367
|
+
result = {}
|
|
368
|
+
start_time = time.time()
|
|
369
|
+
result = await self.callback(msg_copy)
|
|
370
|
+
end_time = time.time()
|
|
371
|
+
if not result:
|
|
372
|
+
result = {
|
|
373
|
+
"messages": [{"content": "Callback did not return a response.", "role": "assistant"}],
|
|
374
|
+
"finish_reason": ["stop"],
|
|
375
|
+
"id": None,
|
|
376
|
+
"template_parameters": {},
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
time_taken = end_time - start_time
|
|
380
|
+
try:
|
|
381
|
+
response = {
|
|
382
|
+
"samples": [result["messages"][-1]["content"]],
|
|
383
|
+
"finish_reason": ["stop"],
|
|
384
|
+
"id": None,
|
|
385
|
+
}
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
msg = "User provided callback does not conform to chat protocol standard."
|
|
388
|
+
raise EvaluationException(
|
|
389
|
+
message=msg,
|
|
390
|
+
internal_message=msg,
|
|
391
|
+
target=ErrorTarget.CALLBACK_CONVERSATION_BOT,
|
|
392
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
393
|
+
blame=ErrorBlame.USER_ERROR,
|
|
394
|
+
) from exc
|
|
395
|
+
|
|
396
|
+
return response, chat_protocol_message, time_taken, result
|
|
397
|
+
|
|
398
|
+
async def _to_chat_protocol(self, conversation_history, template_parameters): # pylint: disable=unused-argument
|
|
399
|
+
messages = []
|
|
400
|
+
|
|
401
|
+
for _, m in enumerate(conversation_history):
|
|
402
|
+
if "image:" in m.message:
|
|
403
|
+
content = await self._to_multi_modal_content(m.message)
|
|
404
|
+
messages.append({"content": content, "role": m.role.value})
|
|
405
|
+
else:
|
|
406
|
+
messages.append({"content": m.message, "role": m.role.value})
|
|
407
|
+
|
|
408
|
+
return {
|
|
409
|
+
"template_parameters": template_parameters,
|
|
410
|
+
"messages": messages,
|
|
411
|
+
"$schema": "http://azureml/sdk-2-0/ChatConversation.json",
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
async def _to_multi_modal_content(self, text: str) -> list:
|
|
415
|
+
split_text = re.findall(r"[^{}]+|\{[^{}]*\}", text)
|
|
416
|
+
messages = [
|
|
417
|
+
text.strip("{}").replace("image:", "").strip() if text.startswith("{") else text for text in split_text
|
|
418
|
+
]
|
|
419
|
+
contents = []
|
|
420
|
+
for msg in messages:
|
|
421
|
+
if msg.startswith("image_understanding/"):
|
|
422
|
+
if isinstance(self.rai_client, RAIClient):
|
|
423
|
+
encoded_image = await self.rai_client.get_image_data(msg)
|
|
424
|
+
else:
|
|
425
|
+
response = self.rai_client.red_teams.get_template_parameters_image(path=msg, stream="true")
|
|
426
|
+
image_data = b"".join(response)
|
|
427
|
+
encoded_image = base64.b64encode(image_data).decode("utf-8")
|
|
428
|
+
|
|
429
|
+
contents.append(
|
|
430
|
+
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
|
|
431
|
+
)
|
|
432
|
+
else:
|
|
433
|
+
contents.append({"type": "text", "text": msg})
|
|
434
|
+
return contents
|
|
435
|
+
|
|
436
|
+
|
|
310
437
|
__all__ = [
|
|
311
438
|
"ConversationRole",
|
|
312
439
|
"ConversationBot",
|
|
313
440
|
"CallbackConversationBot",
|
|
441
|
+
"MultiModalConversationBot",
|
|
314
442
|
"ConversationTurn",
|
|
315
443
|
]
|
|
@@ -4,14 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import logging
|
|
7
|
-
from typing import Callable, Dict, List, Tuple, Union
|
|
7
|
+
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
8
8
|
|
|
9
9
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
10
10
|
from azure.ai.evaluation.simulator._constants import SupportedLanguages
|
|
11
11
|
from azure.ai.evaluation.simulator._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
|
|
12
|
-
|
|
13
12
|
from ..._http_utils import AsyncHttpPipeline
|
|
14
13
|
from . import ConversationBot, ConversationTurn
|
|
14
|
+
from azure.ai.evaluation._common.onedp._client import ProjectsClient as AIProjectClient
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def is_closing_message(response: Union[Dict, str], recursion_depth: int = 0) -> bool:
|
|
@@ -73,14 +73,14 @@ def is_closing_message_helper(response: str) -> bool:
|
|
|
73
73
|
async def simulate_conversation(
|
|
74
74
|
*,
|
|
75
75
|
bots: List[ConversationBot],
|
|
76
|
-
session: AsyncHttpPipeline,
|
|
76
|
+
session: Union[AsyncHttpPipeline, AIProjectClient],
|
|
77
77
|
language: SupportedLanguages,
|
|
78
78
|
stopping_criteria: Callable[[str], bool] = is_closing_message,
|
|
79
79
|
turn_limit: int = 10,
|
|
80
80
|
history_limit: int = 5,
|
|
81
81
|
api_call_delay_sec: float = 0,
|
|
82
82
|
logger: logging.Logger = logging.getLogger(__name__),
|
|
83
|
-
) -> Tuple:
|
|
83
|
+
) -> Tuple[Optional[str], List[ConversationTurn]]:
|
|
84
84
|
"""
|
|
85
85
|
Simulate a conversation between the given bots.
|
|
86
86
|
|
|
@@ -99,9 +99,10 @@ async def simulate_conversation(
|
|
|
99
99
|
:keyword logger: The logger to use for logging. Defaults to the logger named after the current module.
|
|
100
100
|
:paramtype logger: logging.Logger
|
|
101
101
|
:return: Simulation a conversation between the given bots.
|
|
102
|
-
:rtype: Tuple
|
|
102
|
+
:rtype: Tuple[Optional[str], List[ConversationTurn]]
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
+
session_state = {}
|
|
105
106
|
# Read the first prompt.
|
|
106
107
|
(first_response, request, _, full_response) = await bots[0].generate_response(
|
|
107
108
|
session=session,
|
|
@@ -110,7 +111,7 @@ async def simulate_conversation(
|
|
|
110
111
|
turn_number=0,
|
|
111
112
|
)
|
|
112
113
|
if "id" in first_response:
|
|
113
|
-
conversation_id = first_response["id"]
|
|
114
|
+
conversation_id: Optional[str] = first_response["id"]
|
|
114
115
|
else:
|
|
115
116
|
conversation_id = None
|
|
116
117
|
first_prompt = first_response["samples"][0]
|
|
@@ -150,7 +151,10 @@ async def simulate_conversation(
|
|
|
150
151
|
conversation_history=conversation_history,
|
|
151
152
|
max_history=history_limit,
|
|
152
153
|
turn_number=current_turn,
|
|
154
|
+
session_state=session_state,
|
|
153
155
|
)
|
|
156
|
+
if "session_state" in full_response and full_response["session_state"] is not None:
|
|
157
|
+
session_state.update(full_response["session_state"])
|
|
154
158
|
|
|
155
159
|
# check if conversation id is null, which means conversation starter was used. use id from next turn
|
|
156
160
|
if conversation_id is None and "id" in response:
|
|
@@ -12,7 +12,7 @@ OUTPUT_FILE = "openai_api_response.jsonl"
|
|
|
12
12
|
|
|
13
13
|
# Azure endpoint constants
|
|
14
14
|
AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
|
|
15
|
-
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://
|
|
15
|
+
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
|
|
16
16
|
AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
|
|
17
17
|
AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
|
|
18
18
|
r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"
|