azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
import os
|
|
5
|
+
import types
|
|
6
|
+
from typing import Optional, Type
|
|
7
|
+
|
|
8
|
+
from azure.ai.evaluation._evaluate._batch_run.batch_clients import BatchClient
|
|
9
|
+
from azure.ai.evaluation._evaluate._batch_run import RunSubmitterClient
|
|
10
|
+
from azure.ai.evaluation._legacy._adapters._constants import PF_FLOW_ENTRY_IN_TMP
|
|
11
|
+
from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
|
|
12
|
+
inject_openai_api as ported_inject_openai_api,
|
|
13
|
+
recover_openai_api as ported_recover_openai_api,
|
|
14
|
+
)
|
|
15
|
+
from azure.ai.evaluation._constants import PF_DISABLE_TRACING
|
|
16
|
+
from azure.ai.evaluation._evaluate._utils import set_event_loop_policy
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TargetRunContext:
|
|
20
|
+
"""Context manager for target batch run.
|
|
21
|
+
|
|
22
|
+
:param upload_snapshot: Whether to upload target snapshot.
|
|
23
|
+
:type upload_snapshot: bool
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, client: BatchClient, upload_snapshot: bool = False) -> None:
|
|
27
|
+
self._client = client
|
|
28
|
+
self._upload_snapshot = upload_snapshot
|
|
29
|
+
self._original_cwd = os.getcwd()
|
|
30
|
+
|
|
31
|
+
def __enter__(self) -> None:
|
|
32
|
+
# Preserve current working directory, as PF may change it without restoring it afterward
|
|
33
|
+
self._original_cwd = os.getcwd()
|
|
34
|
+
|
|
35
|
+
# Address "[WinError 32] The process cannot access the file" error,
|
|
36
|
+
# caused by conflicts when the venv and target function are in the same directory.
|
|
37
|
+
# Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
|
|
38
|
+
if not self._upload_snapshot:
|
|
39
|
+
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"
|
|
40
|
+
|
|
41
|
+
os.environ[PF_DISABLE_TRACING] = "true"
|
|
42
|
+
|
|
43
|
+
if isinstance(self._client, RunSubmitterClient):
|
|
44
|
+
ported_inject_openai_api()
|
|
45
|
+
# For addressing the issue of asyncio event loop closed on Windows
|
|
46
|
+
set_event_loop_policy()
|
|
47
|
+
|
|
48
|
+
def __exit__(
|
|
49
|
+
self,
|
|
50
|
+
exc_type: Optional[Type[BaseException]],
|
|
51
|
+
exc_value: Optional[BaseException],
|
|
52
|
+
exc_tb: Optional[types.TracebackType],
|
|
53
|
+
) -> None:
|
|
54
|
+
os.chdir(self._original_cwd)
|
|
55
|
+
|
|
56
|
+
if not self._upload_snapshot:
|
|
57
|
+
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
|
|
58
|
+
|
|
59
|
+
os.environ.pop(PF_DISABLE_TRACING, None)
|
|
60
|
+
|
|
61
|
+
if isinstance(self._client, RunSubmitterClient):
|
|
62
|
+
ported_recover_openai_api()
|
|
@@ -10,37 +10,24 @@ import posixpath
|
|
|
10
10
|
import time
|
|
11
11
|
import types
|
|
12
12
|
import uuid
|
|
13
|
-
from typing import Any, Dict, Optional, Set, Type
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set, Type
|
|
14
14
|
from urllib.parse import urlparse
|
|
15
15
|
|
|
16
|
-
from
|
|
16
|
+
from azure.ai.evaluation._legacy._adapters.entities import Run
|
|
17
|
+
from typing_extensions import Self
|
|
17
18
|
|
|
18
19
|
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
19
20
|
from azure.ai.evaluation._http_utils import get_http_client
|
|
20
21
|
from azure.ai.evaluation._version import VERSION
|
|
21
22
|
from azure.core.pipeline.policies import RetryPolicy
|
|
22
23
|
from azure.core.rest import HttpResponse
|
|
24
|
+
from azure.core.exceptions import HttpResponseError
|
|
25
|
+
from azure.storage.blob import BlobServiceClient
|
|
26
|
+
from azure.ai.evaluation._azure._clients import LiteMLClient
|
|
23
27
|
|
|
24
28
|
LOGGER = logging.getLogger(__name__)
|
|
25
29
|
|
|
26
30
|
|
|
27
|
-
# Handle optional import. The azure libraries are only present if
|
|
28
|
-
# promptflow-azure is installed.
|
|
29
|
-
try:
|
|
30
|
-
from azure.ai.ml.entities._credentials import AccountKeyConfiguration # pylint: disable=ungrouped-imports
|
|
31
|
-
from azure.ai.ml.entities._datastore.datastore import Datastore
|
|
32
|
-
from azure.storage.blob import BlobServiceClient
|
|
33
|
-
except (ModuleNotFoundError, ImportError):
|
|
34
|
-
# If the above mentioned modules cannot be imported, we are running
|
|
35
|
-
# in local mode and MLClient in the constructor will be None, so
|
|
36
|
-
# we will not arrive to Azure-dependent code.
|
|
37
|
-
|
|
38
|
-
# We are logging the import failure only if debug logging level is set because:
|
|
39
|
-
# - If the project configuration was not provided this import is not needed.
|
|
40
|
-
# - If the project configuration was provided, the error will be raised by PFClient.
|
|
41
|
-
LOGGER.debug("promptflow.azure is not installed.")
|
|
42
|
-
|
|
43
|
-
|
|
44
31
|
@dataclasses.dataclass
|
|
45
32
|
class RunInfo:
|
|
46
33
|
"""
|
|
@@ -89,18 +76,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
89
76
|
:type group_name: str
|
|
90
77
|
:param workspace_name: The name of workspace/project used to track run.
|
|
91
78
|
:type workspace_name: str
|
|
92
|
-
:param
|
|
93
|
-
:type
|
|
79
|
+
:param management_client: The trace destination string to parse the AI ML workspace blob store from.
|
|
80
|
+
:type management_client:
|
|
81
|
+
~azure.ai.evaluation._promptflow.azure._lite_azure_management_client.LiteMLClient
|
|
94
82
|
:param promptflow_run: The promptflow run used by the
|
|
83
|
+
:type promptflow_run: Optional[promptflow._sdk.entities.Run]
|
|
84
|
+
:param tags: A dictionary of tags to be added to the evaluation run for tracking and organization purposes.
|
|
85
|
+
:type tags: Optional[Dict[str, str]]
|
|
95
86
|
"""
|
|
96
87
|
|
|
97
88
|
_MAX_RETRIES = 5
|
|
98
89
|
_BACKOFF_FACTOR = 2
|
|
99
90
|
_TIMEOUT = 5
|
|
100
|
-
_SCOPE = "https://management.azure.com/.default"
|
|
101
91
|
|
|
102
92
|
EVALUATION_ARTIFACT = "instance_results.jsonl"
|
|
103
|
-
EVALUATION_ARTIFACT_DUMMY_RUN = "eval_results.jsonl"
|
|
104
93
|
|
|
105
94
|
def __init__(
|
|
106
95
|
self,
|
|
@@ -109,20 +98,22 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
109
98
|
subscription_id: str,
|
|
110
99
|
group_name: str,
|
|
111
100
|
workspace_name: str,
|
|
112
|
-
|
|
101
|
+
management_client: LiteMLClient,
|
|
113
102
|
promptflow_run: Optional[Run] = None,
|
|
103
|
+
tags: Optional[Dict[str, str]] = None,
|
|
114
104
|
) -> None:
|
|
115
105
|
self._tracking_uri: str = tracking_uri
|
|
116
106
|
self._subscription_id: str = subscription_id
|
|
117
107
|
self._resource_group_name: str = group_name
|
|
118
108
|
self._workspace_name: str = workspace_name
|
|
119
|
-
self.
|
|
109
|
+
self._management_client: LiteMLClient = management_client
|
|
120
110
|
self._is_promptflow_run: bool = promptflow_run is not None
|
|
121
111
|
self._run_name = run_name
|
|
122
112
|
self._promptflow_run = promptflow_run
|
|
113
|
+
self._tags = tags or {}
|
|
123
114
|
self._status = RunStatus.NOT_STARTED
|
|
124
|
-
self._url_base = None
|
|
125
|
-
self.
|
|
115
|
+
self._url_base: Optional[str] = None
|
|
116
|
+
self._info: Optional[RunInfo] = None
|
|
126
117
|
|
|
127
118
|
@property
|
|
128
119
|
def status(self) -> RunStatus:
|
|
@@ -134,6 +125,20 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
134
125
|
"""
|
|
135
126
|
return self._status
|
|
136
127
|
|
|
128
|
+
@property
|
|
129
|
+
def info(self) -> RunInfo:
|
|
130
|
+
if self._info is None:
|
|
131
|
+
msg = "Run info is missing"
|
|
132
|
+
raise EvaluationException(
|
|
133
|
+
message=msg,
|
|
134
|
+
internal_message=msg,
|
|
135
|
+
target=ErrorTarget.EVAL_RUN,
|
|
136
|
+
category=ErrorCategory.UNKNOWN,
|
|
137
|
+
blame=ErrorBlame.UNKNOWN,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return self._info
|
|
141
|
+
|
|
137
142
|
def _get_scope(self) -> str:
|
|
138
143
|
"""
|
|
139
144
|
Return the scope information for the workspace.
|
|
@@ -161,28 +166,37 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
161
166
|
)
|
|
162
167
|
self._url_base = None
|
|
163
168
|
self._status = RunStatus.BROKEN
|
|
164
|
-
self.
|
|
169
|
+
self._info = RunInfo.generate(self._run_name)
|
|
165
170
|
else:
|
|
166
171
|
self._url_base = urlparse(self._tracking_uri).netloc
|
|
167
172
|
if self._promptflow_run is not None:
|
|
168
|
-
self.
|
|
173
|
+
self._info = RunInfo(
|
|
169
174
|
self._promptflow_run.name,
|
|
170
|
-
self._promptflow_run._experiment_name, # pylint: disable=protected-access
|
|
175
|
+
self._promptflow_run._experiment_name or "", # pylint: disable=protected-access
|
|
171
176
|
self._promptflow_run.name,
|
|
172
177
|
)
|
|
173
178
|
else:
|
|
174
179
|
url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/create"
|
|
180
|
+
|
|
181
|
+
# Prepare tags: start with user tags, ensure mlflow.user is set
|
|
182
|
+
run_tags = self._tags.copy()
|
|
183
|
+
if "mlflow.user" not in run_tags:
|
|
184
|
+
run_tags["mlflow.user"] = "azure-ai-evaluation"
|
|
185
|
+
|
|
186
|
+
# Convert tags to MLflow format
|
|
187
|
+
tags_list = [{"key": key, "value": value} for key, value in run_tags.items()]
|
|
188
|
+
|
|
175
189
|
body = {
|
|
176
190
|
"experiment_id": "0",
|
|
177
191
|
"user_id": "azure-ai-evaluation",
|
|
178
192
|
"start_time": int(time.time() * 1000),
|
|
179
|
-
"tags":
|
|
193
|
+
"tags": tags_list,
|
|
180
194
|
}
|
|
181
195
|
if self._run_name:
|
|
182
196
|
body["run_name"] = self._run_name
|
|
183
197
|
response = self.request_with_retry(url=url, method="POST", json_dict=body)
|
|
184
198
|
if response.status_code != 200:
|
|
185
|
-
self.
|
|
199
|
+
self._info = RunInfo.generate(self._run_name)
|
|
186
200
|
LOGGER.warning(
|
|
187
201
|
"The run failed to start: %s: %s."
|
|
188
202
|
"The results will be saved locally, but will not be logged to Azure.",
|
|
@@ -192,7 +206,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
192
206
|
self._status = RunStatus.BROKEN
|
|
193
207
|
else:
|
|
194
208
|
parsed_response = response.json()
|
|
195
|
-
self.
|
|
209
|
+
self._info = RunInfo(
|
|
196
210
|
run_id=parsed_response["run"]["info"]["run_id"],
|
|
197
211
|
experiment_id=parsed_response["run"]["info"]["experiment_id"],
|
|
198
212
|
run_name=parsed_response["run"]["info"]["run_name"],
|
|
@@ -235,7 +249,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
235
249
|
LOGGER.warning("Unable to terminate the run.")
|
|
236
250
|
self._status = RunStatus.TERMINATED
|
|
237
251
|
|
|
238
|
-
def __enter__(self):
|
|
252
|
+
def __enter__(self) -> Self:
|
|
239
253
|
"""The Context Manager enter call.
|
|
240
254
|
|
|
241
255
|
:return: The instance of the class.
|
|
@@ -249,7 +263,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
249
263
|
exc_type: Optional[Type[BaseException]],
|
|
250
264
|
exc_value: Optional[BaseException],
|
|
251
265
|
exc_tb: Optional[types.TracebackType],
|
|
252
|
-
) ->
|
|
266
|
+
) -> None:
|
|
253
267
|
"""The context manager exit call.
|
|
254
268
|
|
|
255
269
|
:param exc_type: The exception type
|
|
@@ -293,12 +307,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
293
307
|
"""
|
|
294
308
|
return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
|
|
295
309
|
|
|
296
|
-
def _get_token(self):
|
|
297
|
-
|
|
298
|
-
# is an optional dependency.
|
|
299
|
-
from promptflow.azure._utils._token_cache import ArmTokenCache # pylint: disable=import-error,no-name-in-module
|
|
300
|
-
|
|
301
|
-
return ArmTokenCache().get_token(self._ml_client._credential) # pylint: disable=protected-access
|
|
310
|
+
def _get_token(self) -> str:
|
|
311
|
+
return self._management_client.get_token().token
|
|
302
312
|
|
|
303
313
|
def request_with_retry(
|
|
304
314
|
self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None
|
|
@@ -396,7 +406,7 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
396
406
|
"""
|
|
397
407
|
if not self._check_state_and_log("log artifact", {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False):
|
|
398
408
|
return
|
|
399
|
-
# Check if artifact
|
|
409
|
+
# Check if artifact directory is empty or does not exist.
|
|
400
410
|
if not os.path.isdir(artifact_folder):
|
|
401
411
|
LOGGER.warning("The path to the artifact is either not a directory or does not exist.")
|
|
402
412
|
return
|
|
@@ -407,8 +417,8 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
407
417
|
LOGGER.warning("The run results file was not found, skipping artifacts upload.")
|
|
408
418
|
return
|
|
409
419
|
# First we will list the files and the appropriate remote paths for them.
|
|
410
|
-
root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.
|
|
411
|
-
remote_paths = {"paths": []}
|
|
420
|
+
root_upload_path = posixpath.join("promptflow", "PromptFlowArtifacts", self.info.run_id)
|
|
421
|
+
remote_paths: Dict[str, List[Dict[str, str]]] = {"paths": []}
|
|
412
422
|
local_paths = []
|
|
413
423
|
# Go over the artifact folder and upload all artifacts.
|
|
414
424
|
for root, _, filenames in os.walk(artifact_folder):
|
|
@@ -424,18 +434,38 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
424
434
|
local_paths.append(local_file_path)
|
|
425
435
|
|
|
426
436
|
# We will write the artifacts to the workspaceblobstore
|
|
427
|
-
datastore = self.
|
|
437
|
+
datastore = self._management_client.workspace_get_default_datastore(
|
|
438
|
+
self._workspace_name, include_credentials=True
|
|
439
|
+
)
|
|
428
440
|
account_url = f"{datastore.account_name}.blob.{datastore.endpoint}"
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
blob_client.
|
|
441
|
+
|
|
442
|
+
svc_client = BlobServiceClient(account_url=account_url, credential=datastore.credential)
|
|
443
|
+
try:
|
|
444
|
+
for local, remote in zip(local_paths, remote_paths["paths"]):
|
|
445
|
+
blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"])
|
|
446
|
+
with open(local, "rb") as fp:
|
|
447
|
+
blob_client.upload_blob(fp, overwrite=True)
|
|
448
|
+
except HttpResponseError as ex:
|
|
449
|
+
if ex.status_code == 403:
|
|
450
|
+
msg = (
|
|
451
|
+
"Failed to upload evaluation run to the cloud due to insufficient permission to access the storage."
|
|
452
|
+
" Please ensure that the necessary access rights are granted."
|
|
453
|
+
)
|
|
454
|
+
raise EvaluationException(
|
|
455
|
+
message=msg,
|
|
456
|
+
target=ErrorTarget.EVAL_RUN,
|
|
457
|
+
category=ErrorCategory.FAILED_REMOTE_TRACKING,
|
|
458
|
+
blame=ErrorBlame.USER_ERROR,
|
|
459
|
+
tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot",
|
|
460
|
+
) from ex
|
|
461
|
+
|
|
462
|
+
raise ex
|
|
434
463
|
|
|
435
464
|
# To show artifact in UI we will need to register it. If it is a promptflow run,
|
|
436
465
|
# we are rewriting already registered artifact and need to skip this step.
|
|
437
466
|
if self._is_promptflow_run:
|
|
438
467
|
return
|
|
468
|
+
|
|
439
469
|
url = (
|
|
440
470
|
f"https://{self._url_base}/artifact/v2.0/subscriptions/{self._subscription_id}"
|
|
441
471
|
f"/resourceGroups/{self._resource_group_name}/providers/"
|
|
@@ -458,15 +488,28 @@ class EvalRun(contextlib.AbstractContextManager): # pylint: disable=too-many-in
|
|
|
458
488
|
if response.status_code != 200:
|
|
459
489
|
self._log_warning("register artifact", response)
|
|
460
490
|
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
491
|
+
# register artifacts for images if exists in image folder
|
|
492
|
+
try:
|
|
493
|
+
for remote_path in remote_paths["paths"]:
|
|
494
|
+
remote_file_path = remote_path["path"]
|
|
495
|
+
if "images" in os.path.normpath(remote_file_path).split(os.sep):
|
|
496
|
+
response = self.request_with_retry(
|
|
497
|
+
url=url,
|
|
498
|
+
method="POST",
|
|
499
|
+
json_dict={
|
|
500
|
+
"origin": "ExperimentRun",
|
|
501
|
+
"container": f"dcid.{self.info.run_id}",
|
|
502
|
+
"path": posixpath.join("images", os.path.basename(remote_file_path)),
|
|
503
|
+
"dataPath": {
|
|
504
|
+
"dataStoreName": datastore.name,
|
|
505
|
+
"relativePath": remote_file_path,
|
|
506
|
+
},
|
|
507
|
+
},
|
|
508
|
+
)
|
|
509
|
+
if response.status_code != 200:
|
|
510
|
+
self._log_warning("register image artifact", response)
|
|
511
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
512
|
+
LOGGER.debug("Exception occurred while registering image artifact. ex: %s", ex)
|
|
470
513
|
|
|
471
514
|
def log_metric(self, key: str, value: float) -> None:
|
|
472
515
|
"""
|