azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +83 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +148 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
from typing_extensions import overload, override
|
|
5
|
+
from typing import Dict, Union
|
|
6
|
+
|
|
7
|
+
from azure.ai.evaluation._common._experimental import experimental
|
|
8
|
+
from azure.ai.evaluation._common.constants import EvaluationMetrics
|
|
9
|
+
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@experimental
|
|
13
|
+
class UngroundedAttributesEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
14
|
+
"""
|
|
15
|
+
Evaluates ungrounded inference of human attributes for a given query, response, and context for a single-turn evaluation only,
|
|
16
|
+
where query represents the user query and response represents the AI system response given the provided context.
|
|
17
|
+
|
|
18
|
+
Ungrounded Attributes checks for whether a response is first, ungrounded, and checks if it contains information about protected class or
|
|
19
|
+
emotional state of a person.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
It identifies the following attributes:
|
|
23
|
+
|
|
24
|
+
- emotional_state
|
|
25
|
+
- protected_class
|
|
26
|
+
- groundedness
|
|
27
|
+
|
|
28
|
+
:param credential: The credential for connecting to Azure AI project. Required
|
|
29
|
+
:type credential: ~azure.core.credentials.TokenCredential
|
|
30
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
31
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
32
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
33
|
+
:param kwargs: Additional arguments to pass to the evaluator.
|
|
34
|
+
:type kwargs: Any
|
|
35
|
+
|
|
36
|
+
.. note::
|
|
37
|
+
|
|
38
|
+
If this evaluator is supplied to the `evaluate` function, the metric
|
|
39
|
+
for the ungrounded attributes will be "ungrounded_attributes_label".
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
id = "azureai://built-in/evaluators/ungrounded_attributes"
|
|
43
|
+
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
44
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
45
|
+
|
|
46
|
+
@override
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
credential,
|
|
50
|
+
azure_ai_project,
|
|
51
|
+
**kwargs,
|
|
52
|
+
):
|
|
53
|
+
# Set default for evaluate_query if not provided
|
|
54
|
+
if "evaluate_query" not in kwargs:
|
|
55
|
+
kwargs["evaluate_query"] = True
|
|
56
|
+
|
|
57
|
+
super().__init__(
|
|
58
|
+
eval_metric=EvaluationMetrics.UNGROUNDED_ATTRIBUTES,
|
|
59
|
+
azure_ai_project=azure_ai_project,
|
|
60
|
+
credential=credential,
|
|
61
|
+
**kwargs,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
@overload
|
|
65
|
+
def __call__(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
query: str,
|
|
69
|
+
response: str,
|
|
70
|
+
context: str,
|
|
71
|
+
) -> Dict[str, Union[str, float]]:
|
|
72
|
+
"""Evaluate a given query/response pair and context for ungrounded attributes
|
|
73
|
+
|
|
74
|
+
:keyword query: The query to be evaluated.
|
|
75
|
+
:paramtype query: str
|
|
76
|
+
:keyword response: The response to be evaluated.
|
|
77
|
+
:paramtype response: str
|
|
78
|
+
:keyword context: The context to be used for evaluation.
|
|
79
|
+
:paramtype context: str
|
|
80
|
+
:return: The ungrounded attributes label.
|
|
81
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@override
|
|
85
|
+
def __call__( # pylint: disable=docstring-missing-param
|
|
86
|
+
self,
|
|
87
|
+
*args,
|
|
88
|
+
**kwargs,
|
|
89
|
+
):
|
|
90
|
+
"""Evaluate a given query/response pair and context for ungrounded attributes
|
|
91
|
+
|
|
92
|
+
:keyword query: The query to be evaluated.
|
|
93
|
+
:paramtype query: str
|
|
94
|
+
:keyword response: The response to be evaluated.
|
|
95
|
+
:paramtype response: str
|
|
96
|
+
:keyword context: The context to be used for evaluation.
|
|
97
|
+
:paramtype context: str
|
|
98
|
+
:return: The ungrounded attributes label.
|
|
99
|
+
:rtype: Dict[str, Union[str, bool]]
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
return super().__call__(*args, **kwargs)
|
|
@@ -40,9 +40,11 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
40
40
|
|
|
41
41
|
:param credential: The credential for connecting to Azure AI project. Required
|
|
42
42
|
:type credential: ~azure.core.credentials.TokenCredential
|
|
43
|
-
:param azure_ai_project: The
|
|
44
|
-
name.
|
|
45
|
-
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
|
|
43
|
+
:param azure_ai_project: The Azure AI project, which can either be a string representing the project endpoint
|
|
44
|
+
or an instance of AzureAIProject. It contains subscription id, resource group, and project name.
|
|
45
|
+
:type azure_ai_project: Union[str, ~azure.ai.evaluation.AzureAIProject]
|
|
46
|
+
:param threshold: The threshold for the IndirectAttack evaluator. Default is 0.
|
|
47
|
+
:type threshold: int
|
|
46
48
|
|
|
47
49
|
.. admonition:: Example:
|
|
48
50
|
|
|
@@ -52,21 +54,35 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase[Union[str, bool]]):
|
|
|
52
54
|
:language: python
|
|
53
55
|
:dedent: 8
|
|
54
56
|
:caption: Initialize and call an IndirectAttackEvaluator.
|
|
57
|
+
|
|
58
|
+
.. admonition:: Example using Azure AI Project URL:
|
|
59
|
+
|
|
60
|
+
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
|
|
61
|
+
:start-after: [START indirect_attack_evaluator]
|
|
62
|
+
:end-before: [END indirect_attack_evaluator]
|
|
63
|
+
:language: python
|
|
64
|
+
:dedent: 8
|
|
65
|
+
:caption: Initialize and call IndirectAttackEvaluator using Azure AI Project URL in the following format
|
|
66
|
+
https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
|
|
67
|
+
|
|
55
68
|
"""
|
|
56
69
|
|
|
57
|
-
id = "
|
|
70
|
+
id = "azureai://built-in/evaluators/indirect_attack"
|
|
58
71
|
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
|
|
72
|
+
_OPTIONAL_PARAMS = ["query"]
|
|
59
73
|
|
|
60
74
|
@override
|
|
61
75
|
def __init__(
|
|
62
76
|
self,
|
|
63
77
|
credential,
|
|
64
78
|
azure_ai_project,
|
|
79
|
+
**kwargs,
|
|
65
80
|
):
|
|
66
81
|
super().__init__(
|
|
67
82
|
eval_metric=EvaluationMetrics.XPIA,
|
|
68
83
|
azure_ai_project=azure_ai_project,
|
|
69
84
|
credential=credential,
|
|
85
|
+
**kwargs,
|
|
70
86
|
)
|
|
71
87
|
|
|
72
88
|
@overload
|
|
@@ -9,6 +9,15 @@ from typing import Optional
|
|
|
9
9
|
from azure.core.exceptions import AzureError
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
class ErrorMessage(Enum):
|
|
13
|
+
"""Error messages to be used when raising EvaluationException.
|
|
14
|
+
|
|
15
|
+
These messages are used to provide a consistent error message format across the SDK.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
MALFORMED_CONVERSATION_HISTORY = "Malformed Conversation History: Query parameter representing conversation history should have exactly one more user query than agent responses"
|
|
19
|
+
|
|
20
|
+
|
|
12
21
|
class ErrorCategory(Enum):
|
|
13
22
|
"""Error category to be specified when using EvaluationException class.
|
|
14
23
|
|
|
@@ -38,6 +47,8 @@ class ErrorCategory(Enum):
|
|
|
38
47
|
FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
|
|
39
48
|
PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
|
|
40
49
|
UNKNOWN = "UNKNOWN"
|
|
50
|
+
UPLOAD_ERROR = "UPLOAD ERROR"
|
|
51
|
+
NOT_APPLICABLE = "NOT APPLICABLE"
|
|
41
52
|
|
|
42
53
|
|
|
43
54
|
class ErrorBlame(Enum):
|
|
@@ -62,16 +73,21 @@ class ErrorTarget(Enum):
|
|
|
62
73
|
CODE_CLIENT = "CodeClient"
|
|
63
74
|
RAI_CLIENT = "RAIClient"
|
|
64
75
|
COHERENCE_EVALUATOR = "CoherenceEvaluator"
|
|
76
|
+
COMPLETENESS_EVALUATOR = "CompletenessEvaluator"
|
|
65
77
|
CONTENT_SAFETY_CHAT_EVALUATOR = "ContentSafetyEvaluator"
|
|
66
|
-
CONTENT_SAFETY_MULTIMODAL_EVALUATOR = "ContentSafetyMultimodalEvaluator"
|
|
67
78
|
ECI_EVALUATOR = "ECIEvaluator"
|
|
68
79
|
F1_EVALUATOR = "F1Evaluator"
|
|
69
80
|
GROUNDEDNESS_EVALUATOR = "GroundednessEvaluator"
|
|
81
|
+
TASK_NAVIGATION_EFFICIENCY_EVALUATOR = "_TaskNavigationEfficiencyEvaluator"
|
|
70
82
|
PROTECTED_MATERIAL_EVALUATOR = "ProtectedMaterialEvaluator"
|
|
83
|
+
INTENT_RESOLUTION_EVALUATOR = "IntentResolutionEvaluator"
|
|
71
84
|
RELEVANCE_EVALUATOR = "RelevanceEvaluator"
|
|
72
85
|
SIMILARITY_EVALUATOR = "SimilarityEvaluator"
|
|
73
86
|
FLUENCY_EVALUATOR = "FluencyEvaluator"
|
|
74
87
|
RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
|
|
88
|
+
TOOL_SUCCESS_EVALUATOR = "_ToolSuccessEvaluator"
|
|
89
|
+
TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
|
|
90
|
+
TASK_COMPLETION_EVALUATOR = "_TaskCompletionEvaluator"
|
|
75
91
|
INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
|
|
76
92
|
INDIRECT_ATTACK_SIMULATOR = "IndirectAttackSimulator"
|
|
77
93
|
ADVERSARIAL_SIMULATOR = "AdversarialSimulator"
|
|
@@ -81,6 +97,13 @@ class ErrorTarget(Enum):
|
|
|
81
97
|
MODELS = "Models"
|
|
82
98
|
UNKNOWN = "Unknown"
|
|
83
99
|
CONVERSATION = "Conversation"
|
|
100
|
+
TOOL_CALL_ACCURACY_EVALUATOR = "ToolCallAccuracyEvaluator"
|
|
101
|
+
TOOL_SELECTION_EVALUATOR = "_ToolSelectionEvaluator"
|
|
102
|
+
TOOL_INPUT_ACCURACY_EVALUATOR = "_ToolInputAccuracyEvaluator"
|
|
103
|
+
RED_TEAM = "RedTeam"
|
|
104
|
+
AOAI_GRADER = "AoaiGrader"
|
|
105
|
+
CONVERSATION_HISTORY_PARSING = "_get_conversation_history"
|
|
106
|
+
TOOL_OUTPUT_UTILIZATION_EVALUATOR = "ToolOutputUtilizationEvaluator"
|
|
84
107
|
|
|
85
108
|
|
|
86
109
|
class EvaluationException(AzureError):
|
|
@@ -7,7 +7,7 @@ from typing import Any, Dict, MutableMapping, Optional, TypedDict, cast
|
|
|
7
7
|
|
|
8
8
|
from typing_extensions import Self, Unpack
|
|
9
9
|
|
|
10
|
-
from azure.ai.evaluation._user_agent import
|
|
10
|
+
from azure.ai.evaluation._user_agent import UserAgentSingleton
|
|
11
11
|
from azure.core.configuration import Configuration
|
|
12
12
|
from azure.core.pipeline import AsyncPipeline, Pipeline
|
|
13
13
|
from azure.core.pipeline.policies import (
|
|
@@ -448,19 +448,21 @@ class AsyncHttpPipeline(AsyncPipeline):
|
|
|
448
448
|
return cast(Self, await super().__aenter__())
|
|
449
449
|
|
|
450
450
|
|
|
451
|
-
def get_http_client() -> HttpPipeline:
|
|
451
|
+
def get_http_client(**kwargs: Any) -> HttpPipeline:
|
|
452
452
|
"""Get an HttpPipeline configured with common policies.
|
|
453
453
|
|
|
454
454
|
:returns: An HttpPipeline with a set of applied policies:
|
|
455
455
|
:rtype: HttpPipeline
|
|
456
456
|
"""
|
|
457
|
-
|
|
457
|
+
kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
|
|
458
|
+
return HttpPipeline(**kwargs)
|
|
458
459
|
|
|
459
460
|
|
|
460
|
-
def get_async_http_client() -> AsyncHttpPipeline:
|
|
461
|
+
def get_async_http_client(**kwargs: Any) -> AsyncHttpPipeline:
|
|
461
462
|
"""Get an AsyncHttpPipeline configured with common policies.
|
|
462
463
|
|
|
463
464
|
:returns: An AsyncHttpPipeline with a set of applied policies:
|
|
464
465
|
:rtype: AsyncHttpPipeline
|
|
465
466
|
"""
|
|
466
|
-
|
|
467
|
+
kwargs.setdefault("user_agent_policy", UserAgentPolicy(base_user_agent=UserAgentSingleton().value))
|
|
468
|
+
return AsyncHttpPipeline(**kwargs)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# NOTE: This contains adapters that make the Promptflow dependency optional. In the first phase,
|
|
6
|
+
# Promptflow will still be installed as part of the azure-ai-evaluation dependencies. This
|
|
7
|
+
# will be removed in the future once the code migration is complete.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Final
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_has_legacy = False
|
|
9
|
+
try:
|
|
10
|
+
from promptflow._constants import FlowType
|
|
11
|
+
|
|
12
|
+
_has_legacy = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
HAS_LEGACY_SDK: Final[bool] = _has_legacy
|
|
17
|
+
MISSING_LEGACY_SDK: Final[bool] = not _has_legacy
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Final, Optional
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from promptflow._sdk._configuration import Configuration as _Configuration
|
|
12
|
+
except ImportError:
|
|
13
|
+
_global_config: Final[Dict[str, Any]] = {}
|
|
14
|
+
|
|
15
|
+
class _Configuration:
|
|
16
|
+
TRACE_DESTINATION: Final[str] = "trace.destination"
|
|
17
|
+
_instance = None
|
|
18
|
+
|
|
19
|
+
def __init__(self, *, override_config: Optional[Dict[str, Any]] = None) -> None:
|
|
20
|
+
self._config = override_config or {}
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def get_instance(cls) -> "_Configuration":
|
|
24
|
+
"""Use this to get instance to avoid multiple copies of same global config."""
|
|
25
|
+
if cls._instance is None:
|
|
26
|
+
cls._instance = Configuration(override_config=_global_config)
|
|
27
|
+
return cls._instance
|
|
28
|
+
|
|
29
|
+
def set_config(self, key: str, value: Any) -> None:
|
|
30
|
+
# Simulated config storage
|
|
31
|
+
self._config[key] = value
|
|
32
|
+
|
|
33
|
+
def get_config(self, key: str) -> Any:
|
|
34
|
+
# Simulated config storage
|
|
35
|
+
if key in self._config:
|
|
36
|
+
return self._config[key]
|
|
37
|
+
return _global_config.get(key, None)
|
|
38
|
+
|
|
39
|
+
def get_trace_destination(self, path: Optional[Path] = None) -> Optional[str]:
|
|
40
|
+
if path:
|
|
41
|
+
raise NotImplementedError("Setting trace destination with a path is not supported.")
|
|
42
|
+
return self._config.get("trace.destination", None)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
Configuration: TypeAlias = _Configuration
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Final
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
PF_FLOW_ENTRY_IN_TMP: Final[str] = "PF_FLOW_ENTRY_IN_TMP"
|
|
9
|
+
PF_FLOW_META_LOAD_IN_SUBPROCESS: Final[str] = "PF_FLOW_META_LOAD_IN_SUBPROCESS"
|
|
10
|
+
LINE_NUMBER: Final[str] = "line_number"
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from promptflow.core._errors import MissingRequiredPackage as _MissingRequiredPackage
|
|
11
|
+
except ImportError:
|
|
12
|
+
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
|
|
13
|
+
|
|
14
|
+
class _MissingRequiredPackage(EvaluationException):
|
|
15
|
+
"""Raised when a required package is missing.
|
|
16
|
+
|
|
17
|
+
:param message: A message describing the error. This is the error message the user will see.
|
|
18
|
+
:type message: str
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, message: str, **kwargs: Any):
|
|
22
|
+
kwargs.setdefault("category", ErrorCategory.MISSING_PACKAGE)
|
|
23
|
+
kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
|
|
24
|
+
kwargs.setdefault("target", ErrorTarget.EVALUATE)
|
|
25
|
+
kwargs.setdefault("internal_message", "Missing required package.")
|
|
26
|
+
super().__init__(message=message, **kwargs)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
MissingRequiredPackage: TypeAlias = _MissingRequiredPackage
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from promptflow.core._flow import AsyncPrompty as _AsyncPrompty
|
|
10
|
+
from promptflow._sdk.entities._flows import FlexFlow as _FlexFlow
|
|
11
|
+
from promptflow._sdk.entities._flows.dag import Flow as _Flow
|
|
12
|
+
except ImportError:
|
|
13
|
+
from azure.ai.evaluation._legacy.prompty import AsyncPrompty as _AsyncPrompty
|
|
14
|
+
|
|
15
|
+
class _FlexFlow:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
_FlexFlow.__name__ = "FlexFlow"
|
|
19
|
+
|
|
20
|
+
class _Flow:
|
|
21
|
+
name: str
|
|
22
|
+
|
|
23
|
+
_Flow.__name__ = "Flow"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
AsyncPrompty: TypeAlias = _AsyncPrompty
|
|
27
|
+
FlexFlow: TypeAlias = _FlexFlow
|
|
28
|
+
Flow: TypeAlias = _Flow
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Callable, Final
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from promptflow._cli._pf._service import stop_service as _stop_service
|
|
10
|
+
except ImportError:
|
|
11
|
+
|
|
12
|
+
def _stop_service() -> None:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
stop_service: Final[Callable[[], None]] = _stop_service
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from os import PathLike
|
|
6
|
+
from typing import Any, Callable, Dict, Optional, Union
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from ._errors import MissingRequiredPackage
|
|
12
|
+
from ._configuration import Configuration
|
|
13
|
+
from .entities import Run
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from promptflow.client import PFClient as _PFClient
|
|
18
|
+
except ImportError:
|
|
19
|
+
|
|
20
|
+
class _PFClient:
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
self._config = Configuration(override_config=kwargs.pop("config", None))
|
|
23
|
+
|
|
24
|
+
def run(
|
|
25
|
+
self,
|
|
26
|
+
flow: Union[str, PathLike, Callable],
|
|
27
|
+
*,
|
|
28
|
+
data: Union[str, PathLike],
|
|
29
|
+
run: Optional[Union[str, Run]] = None,
|
|
30
|
+
column_mapping: Optional[dict] = None,
|
|
31
|
+
variant: Optional[str] = None,
|
|
32
|
+
connections: Optional[dict] = None,
|
|
33
|
+
environment_variables: Optional[dict] = None,
|
|
34
|
+
name: Optional[str] = None,
|
|
35
|
+
display_name: Optional[str] = None,
|
|
36
|
+
tags: Optional[Dict[str, str]] = None,
|
|
37
|
+
resume_from: Optional[Union[str, Run]] = None,
|
|
38
|
+
code: Optional[Union[str, PathLike]] = None,
|
|
39
|
+
init: Optional[dict] = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
) -> Run:
|
|
42
|
+
raise MissingRequiredPackage("Please install 'promptflow' package to use PFClient")
|
|
43
|
+
|
|
44
|
+
def get_details(self, run: Union[str, Run], max_results: int = 100, all_results: bool = False) -> pd.DataFrame:
|
|
45
|
+
return pd.DataFrame()
|
|
46
|
+
|
|
47
|
+
def get_metrics(self, run: Union[str, Run]) -> Dict[str, Any]:
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
PFClient: TypeAlias = _PFClient
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from promptflow._sdk.entities import Run as _Run
|
|
10
|
+
except ImportError:
|
|
11
|
+
from typing_extensions import Protocol
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
class _Run(Protocol):
|
|
17
|
+
name: str
|
|
18
|
+
status: str
|
|
19
|
+
_properties: Dict[str, Any]
|
|
20
|
+
_created_on: datetime
|
|
21
|
+
_end_time: Optional[datetime]
|
|
22
|
+
_experiment_name: Optional[str]
|
|
23
|
+
_output_path: Path
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Run: TypeAlias = _Run
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Callable, Dict, Final, Optional
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from promptflow.tracing import ThreadPoolExecutorWithContext as _ThreadPoolExecutorWithContext
|
|
11
|
+
from promptflow.tracing._integrations._openai_injector import (
|
|
12
|
+
inject_openai_api as _inject,
|
|
13
|
+
recover_openai_api as _recover,
|
|
14
|
+
)
|
|
15
|
+
from promptflow.tracing import _start_trace
|
|
16
|
+
except ImportError:
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor as _ThreadPoolExecutorWithContext
|
|
18
|
+
from azure.ai.evaluation._legacy._batch_engine._openai_injector import (
|
|
19
|
+
inject_openai_api as _inject,
|
|
20
|
+
recover_openai_api as _recover,
|
|
21
|
+
)
|
|
22
|
+
from azure.ai.evaluation._legacy._batch_engine._trace import start_trace as _start_trace
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
ThreadPoolExecutorWithContext: TypeAlias = _ThreadPoolExecutorWithContext
|
|
26
|
+
inject_openai_api: Final[Callable[[], None]] = _inject
|
|
27
|
+
recover_openai_api: Final[Callable[[], None]] = _recover
|
|
28
|
+
start_trace: Final = _start_trace
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AttrDict(dict):
|
|
9
|
+
"""A dictionary that allows attribute access to its keys."""
|
|
10
|
+
|
|
11
|
+
def __getattr__(self, key: str) -> Any:
|
|
12
|
+
return self[key]
|
|
13
|
+
|
|
14
|
+
def __setattr__(self, key: str, value: Any) -> None:
|
|
15
|
+
self[key] = value
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from typing import Final, Optional
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from promptflow._utils.user_agent_utils import ClientUserAgentUtil as _ClientUserAgentUtil
|
|
11
|
+
from promptflow._utils.async_utils import async_run_allowing_running_loop as _async_run_allowing_running_loop
|
|
12
|
+
from promptflow._cli._utils import get_workspace_triad_from_local as _get_workspace_triad_from_local
|
|
13
|
+
except ImportError:
|
|
14
|
+
from azure.ai.evaluation._legacy._batch_engine._utils_deprecated import (
|
|
15
|
+
async_run_allowing_running_loop as _async_run_allowing_running_loop,
|
|
16
|
+
)
|
|
17
|
+
from azure.ai.evaluation._evaluate._utils import AzureMLWorkspace
|
|
18
|
+
|
|
19
|
+
class _ClientUserAgentUtil:
|
|
20
|
+
@staticmethod
|
|
21
|
+
def append_user_agent(user_agent: Optional[str]):
|
|
22
|
+
# TODO ralphe: implement?
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def _get_workspace_triad_from_local() -> AzureMLWorkspace:
|
|
26
|
+
return AzureMLWorkspace("", "", "")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
ClientUserAgentUtil: TypeAlias = _ClientUserAgentUtil
|
|
30
|
+
async_run_allowing_running_loop: Final = _async_run_allowing_running_loop
|
|
31
|
+
get_workspace_triad_from_local: Final = _get_workspace_triad_from_local
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# NOTE: This is a direct port of the bare minimum needed for BatchEngine functionality from
|
|
6
|
+
# the original Promptflow code. The goal here is expediency, not elegance. As such
|
|
7
|
+
# parts of this code may be a little "quirky", seem incomplete in places, or contain
|
|
8
|
+
# longer TODOs comments than usual. In a future code update, large swaths of this code
|
|
9
|
+
# will be refactored or deleted outright.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from logging import Logger
|
|
7
|
+
|
|
8
|
+
from ..._constants import PF_BATCH_TIMEOUT_SEC_DEFAULT
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class BatchEngineConfig:
|
|
13
|
+
"""Context for a batch of evaluations. This will contain the configuration,
|
|
14
|
+
logging, and other needed information."""
|
|
15
|
+
|
|
16
|
+
logger: Logger
|
|
17
|
+
"""The logger to use for logging messages."""
|
|
18
|
+
|
|
19
|
+
batch_timeout_seconds: int = PF_BATCH_TIMEOUT_SEC_DEFAULT
|
|
20
|
+
"""The maximum amount of time to wait for all evaluations in the batch to complete."""
|
|
21
|
+
|
|
22
|
+
line_timeout_seconds: int = 600
|
|
23
|
+
"""The maximum amount of time to wait for an evaluation to run against a single entry
|
|
24
|
+
in the data input to complete."""
|
|
25
|
+
|
|
26
|
+
max_concurrency: int = 10
|
|
27
|
+
"""The maximum number of evaluations to run concurrently."""
|
|
28
|
+
|
|
29
|
+
use_async: bool = True
|
|
30
|
+
"""Whether to use asynchronous evaluation."""
|
|
31
|
+
|
|
32
|
+
default_num_results: int = 100
|
|
33
|
+
"""The default number of results to return if you don't ask for all results."""
|
|
34
|
+
|
|
35
|
+
raise_on_error: bool = True
|
|
36
|
+
"""Whether to raise an error if an evaluation fails."""
|
|
37
|
+
|
|
38
|
+
def __post_init__(self):
|
|
39
|
+
if self.logger is None:
|
|
40
|
+
raise ValueError("logger cannot be None")
|
|
41
|
+
if self.batch_timeout_seconds <= 0:
|
|
42
|
+
raise ValueError("batch_timeout_seconds must be greater than 0")
|
|
43
|
+
if self.line_timeout_seconds <= 0:
|
|
44
|
+
raise ValueError("line_timeout_seconds must be greater than 0")
|
|
45
|
+
if self.max_concurrency <= 0:
|
|
46
|
+
raise ValueError("max_concurrency must be greater than 0")
|
|
47
|
+
if self.default_num_results <= 0:
|
|
48
|
+
raise ValueError("default_num_results must be greater than 0")
|