azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from ..._exceptions import ErrorCategory, ErrorBlame, ErrorTarget, EvaluationException
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BatchEngineError(EvaluationException):
|
|
9
|
+
"""Exception class for batch engine errors.
|
|
10
|
+
|
|
11
|
+
This exception is used to indicate that the error was caused by or in the batch engine.
|
|
12
|
+
|
|
13
|
+
:param message: The error message.
|
|
14
|
+
:type message: str
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, message: str, **kwargs):
|
|
18
|
+
kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
|
|
19
|
+
kwargs.setdefault("target", ErrorTarget.EVAL_RUN)
|
|
20
|
+
kwargs.setdefault("blame", ErrorBlame.UNKNOWN)
|
|
21
|
+
|
|
22
|
+
super().__init__(message, **kwargs)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BatchEngineValidationError(BatchEngineError):
|
|
26
|
+
"""Exception raised when validation fails
|
|
27
|
+
|
|
28
|
+
:param message: The error message.
|
|
29
|
+
:type message: str
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, message: str, **kwargs):
|
|
33
|
+
kwargs.setdefault("category", ErrorCategory.INVALID_VALUE)
|
|
34
|
+
kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
|
|
35
|
+
super().__init__(message, **kwargs)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BatchEngineTimeoutError(BatchEngineError):
|
|
39
|
+
"""Exception raised when a batch engine operation times out.
|
|
40
|
+
|
|
41
|
+
:param message: The error message.
|
|
42
|
+
:type message: str
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, message: str, **kwargs):
|
|
46
|
+
kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
|
|
47
|
+
kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
|
|
48
|
+
super().__init__(message, **kwargs)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BatchEngineCanceledError(BatchEngineError):
|
|
52
|
+
"""Exception raised when a batch engine operation is canceled.
|
|
53
|
+
|
|
54
|
+
:param message: The error message.
|
|
55
|
+
:type message: str
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, message: str, **kwargs):
|
|
59
|
+
kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
|
|
60
|
+
kwargs.setdefault("blame", ErrorBlame.USER_ERROR)
|
|
61
|
+
super().__init__(message, **kwargs)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class BatchEngineRunFailedError(BatchEngineError):
|
|
65
|
+
"""Exception raised when a batch engine run fails.
|
|
66
|
+
|
|
67
|
+
:param message: The error message.
|
|
68
|
+
:type message: str
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, message: str, **kwargs):
|
|
72
|
+
kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
|
|
73
|
+
kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
|
|
74
|
+
super().__init__(message, **kwargs)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class BatchEnginePartialError(BatchEngineError):
|
|
78
|
+
"""Exception raised when a batch engine run has some successfull lines, mixed in
|
|
79
|
+
with some failures.
|
|
80
|
+
|
|
81
|
+
:param message: The error message.
|
|
82
|
+
:type message: str
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, message: str, **kwargs):
|
|
86
|
+
kwargs.setdefault("category", ErrorCategory.FAILED_EXECUTION)
|
|
87
|
+
kwargs.setdefault("blame", ErrorBlame.SYSTEM_ERROR)
|
|
88
|
+
super().__init__(message, **kwargs)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source code: promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
|
|
6
|
+
|
|
7
|
+
import functools
|
|
8
|
+
import importlib
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from contextvars import ContextVar
|
|
12
|
+
from typing import Any, Callable, Final, Generator, Optional, Protocol, Sequence, Tuple
|
|
13
|
+
|
|
14
|
+
from azure.ai.evaluation._legacy._adapters._errors import MissingRequiredPackage
|
|
15
|
+
from azure.ai.evaluation._legacy._batch_engine._result import TokenMetrics
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_token_metrics: ContextVar[TokenMetrics] = ContextVar("token_metrics", default=TokenMetrics(0, 0, 0))
|
|
19
|
+
KEY_ATTR_ORIGINAL: Final[str] = "_original"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _TokenMetrics(Protocol):
|
|
23
|
+
"""Protocol class to represent the token metrics."""
|
|
24
|
+
|
|
25
|
+
prompt_tokens: int
|
|
26
|
+
completion_tokens: int
|
|
27
|
+
total_tokens: int
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _WithUsage(Protocol):
|
|
31
|
+
"""Protocol class to represent an OpenAI object that may have a token usage property/attribute."""
|
|
32
|
+
|
|
33
|
+
usage: Optional[_TokenMetrics]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _wrap_openai_api_method(method: Callable, is_async: bool) -> Callable:
|
|
37
|
+
"""Wraps the OpenAI API method to inject logic to run on the result of the call."""
|
|
38
|
+
|
|
39
|
+
def update_usage(result: _WithUsage) -> None:
|
|
40
|
+
if hasattr(result, "usage") and result.usage is not None:
|
|
41
|
+
usage = _token_metrics.get()
|
|
42
|
+
usage.prompt_tokens += result.usage.prompt_tokens
|
|
43
|
+
usage.completion_tokens += result.usage.completion_tokens
|
|
44
|
+
usage.total_tokens += result.usage.total_tokens
|
|
45
|
+
|
|
46
|
+
if is_async:
|
|
47
|
+
|
|
48
|
+
@functools.wraps(method)
|
|
49
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
50
|
+
result: _WithUsage = await method(*args, **kwargs)
|
|
51
|
+
update_usage(result)
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
return async_wrapper
|
|
55
|
+
else:
|
|
56
|
+
|
|
57
|
+
@functools.wraps(method)
|
|
58
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
59
|
+
result: _WithUsage = method(*args, **kwargs)
|
|
60
|
+
update_usage(result)
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
return sync_wrapper
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _openai_api_list() -> Generator[Tuple[Any, Callable, bool], None, None]:
|
|
67
|
+
"""Load the list of OpenAI API classes and their corresponding method names."""
|
|
68
|
+
|
|
69
|
+
apis: Sequence[Tuple[str, str, str, bool]] = [
|
|
70
|
+
("openai.resources.chat", "Completions", "create", False),
|
|
71
|
+
("openai.resources.chat", "AsyncCompletions", "create", True),
|
|
72
|
+
("openai.resources", "Completions", "create", False),
|
|
73
|
+
("openai.resources", "AsyncCompletions", "create", True),
|
|
74
|
+
("openai.resources", "Embeddings", "create", False),
|
|
75
|
+
("openai.resources", "AsyncEmbeddings", "create", True),
|
|
76
|
+
("openai.resources", "Responses", "create", False),
|
|
77
|
+
("openai.resources", "AsyncResponses", "create", True),
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
for module_name, class_name, method_name, is_async in apis:
|
|
81
|
+
try:
|
|
82
|
+
module = importlib.import_module(module_name)
|
|
83
|
+
cls = getattr(module, class_name, None)
|
|
84
|
+
if cls is None:
|
|
85
|
+
continue
|
|
86
|
+
method = getattr(cls, method_name, None)
|
|
87
|
+
if method is None:
|
|
88
|
+
continue
|
|
89
|
+
yield cls, method, is_async
|
|
90
|
+
except ImportError:
|
|
91
|
+
raise MissingRequiredPackage("Please install the 'openai' package to use the Azure AI Evaluation SDK")
|
|
92
|
+
except AttributeError:
|
|
93
|
+
logging.warning(
|
|
94
|
+
"The module '%s' does not have class '%s' or method '%s'", module_name, class_name, method_name
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def inject_openai_api():
|
|
99
|
+
"""This function modifies the create methods of the OpenAI API classes to inject logic
|
|
100
|
+
to enable us to collect token usage data.
|
|
101
|
+
"""
|
|
102
|
+
for cls, method, is_async in _openai_api_list():
|
|
103
|
+
# Check if the create method of the openai_api class has already been modified
|
|
104
|
+
if not hasattr(method, KEY_ATTR_ORIGINAL):
|
|
105
|
+
wrapper_method: Callable = _wrap_openai_api_method(method, is_async)
|
|
106
|
+
setattr(wrapper_method, KEY_ATTR_ORIGINAL, method)
|
|
107
|
+
setattr(cls, method.__name__, wrapper_method)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def recover_openai_api():
|
|
111
|
+
"""This function restores the original create methods of the OpenAI API classes
|
|
112
|
+
by assigning them back from the _original attributes of the modified methods.
|
|
113
|
+
"""
|
|
114
|
+
for cls, method, _ in _openai_api_list():
|
|
115
|
+
if hasattr(method, KEY_ATTR_ORIGINAL):
|
|
116
|
+
original_method = getattr(method, KEY_ATTR_ORIGINAL)
|
|
117
|
+
setattr(cls, method.__name__, original_method)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class CaptureOpenAITokenUsage:
|
|
121
|
+
"""Context manager to capture OpenAI token usage."""
|
|
122
|
+
|
|
123
|
+
def __init__(self):
|
|
124
|
+
self._tokens = TokenMetrics(0, 0, 0)
|
|
125
|
+
|
|
126
|
+
def __enter__(self) -> TokenMetrics:
|
|
127
|
+
_token_metrics.set(TokenMetrics(0, 0, 0))
|
|
128
|
+
return self._tokens
|
|
129
|
+
|
|
130
|
+
def __exit__(self, exc_type: Optional[Exception], exc_value: Optional[Exception], traceback: Optional[Any]) -> None:
|
|
131
|
+
captured_metrics = _token_metrics.get()
|
|
132
|
+
self._tokens.update(captured_metrics)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from typing import Any, Mapping, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
from ._status import BatchStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TokenMetrics:
|
|
14
|
+
"""The token metrics of a run."""
|
|
15
|
+
|
|
16
|
+
prompt_tokens: int
|
|
17
|
+
"""The number of tokens used in the prompt for the run."""
|
|
18
|
+
completion_tokens: int
|
|
19
|
+
"""The number of tokens used in the completion for the run."""
|
|
20
|
+
total_tokens: int
|
|
21
|
+
"""The total number of tokens used in the run."""
|
|
22
|
+
|
|
23
|
+
def update(self, other: "TokenMetrics") -> None:
|
|
24
|
+
"""Update the token metrics with another set of token metrics."""
|
|
25
|
+
self.prompt_tokens += other.prompt_tokens
|
|
26
|
+
self.completion_tokens += other.completion_tokens
|
|
27
|
+
self.total_tokens += other.total_tokens
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class BatchRunError:
|
|
32
|
+
"""The error of a batch run."""
|
|
33
|
+
|
|
34
|
+
details: str
|
|
35
|
+
"""The details of the error."""
|
|
36
|
+
exception: Optional[BaseException]
|
|
37
|
+
"""The exception of the error."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class BatchRunDetails:
|
|
42
|
+
"""The error of a line in a batch run."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
"""The ID of the line run."""
|
|
46
|
+
status: BatchStatus
|
|
47
|
+
"""The status of the line run."""
|
|
48
|
+
result: Optional[Mapping[str, Any]]
|
|
49
|
+
"""The result of the line run."""
|
|
50
|
+
start_time: Optional[datetime]
|
|
51
|
+
"""The start time of the line run. If this was never started, this should be None."""
|
|
52
|
+
end_time: Optional[datetime]
|
|
53
|
+
"""The end time of the line run. If this never completed, this should be None."""
|
|
54
|
+
tokens: TokenMetrics
|
|
55
|
+
"""The token metrics of the line run."""
|
|
56
|
+
error: Optional[BatchRunError]
|
|
57
|
+
"""The error of the line run. This will only be set if the status is Failed."""
|
|
58
|
+
index: int
|
|
59
|
+
"""The line run index."""
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def duration(self) -> timedelta:
|
|
63
|
+
"""The duration of the line run."""
|
|
64
|
+
if self.start_time is not None and self.end_time is not None:
|
|
65
|
+
return self.end_time - self.start_time
|
|
66
|
+
return timedelta(0)
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def create_id(run_id: str, index: int) -> str:
|
|
70
|
+
"""Helper method to create the ID for a line run."""
|
|
71
|
+
return f"{run_id}_{index}"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class BatchResult:
|
|
76
|
+
"""The result of a batch run."""
|
|
77
|
+
|
|
78
|
+
status: BatchStatus
|
|
79
|
+
"""The overall status of the batch run."""
|
|
80
|
+
total_lines: int
|
|
81
|
+
"""The total number of lines in the batch run."""
|
|
82
|
+
failed_lines: int
|
|
83
|
+
"""The number of failed lines in the batch run."""
|
|
84
|
+
start_time: datetime
|
|
85
|
+
"""The start time of the batch run."""
|
|
86
|
+
end_time: datetime
|
|
87
|
+
"""The end time of the batch run."""
|
|
88
|
+
tokens: TokenMetrics
|
|
89
|
+
"""The overall token metrics of the batch run."""
|
|
90
|
+
details: Sequence[BatchRunDetails]
|
|
91
|
+
"""The details of each line in the batch run."""
|
|
92
|
+
error: Optional[Exception] = None
|
|
93
|
+
"""The error of the batch run. This will only be set if the status does not indicate success."""
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def duration(self) -> timedelta:
|
|
97
|
+
"""The duration of the batch run."""
|
|
98
|
+
if self.start_time is not None and self.end_time is not None:
|
|
99
|
+
return self.end_time - self.start_time
|
|
100
|
+
return timedelta(0)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def results(self) -> Sequence[Optional[Mapping[str, Any]]]:
|
|
104
|
+
"""The results of the batch run."""
|
|
105
|
+
if not self.details:
|
|
106
|
+
return []
|
|
107
|
+
return [d.result for d in self.details]
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Callable, Mapping, Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from ._utils import normalize_identifier_name
|
|
11
|
+
from ._result import BatchResult
|
|
12
|
+
from ._status import BatchStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RunStatus(Enum):
|
|
16
|
+
# TODO ralphe: Trim this to just the statuses we need
|
|
17
|
+
# QUEUED = "Queued"
|
|
18
|
+
NOT_STARTED = "NotStarted"
|
|
19
|
+
PREPARING = "Preparing"
|
|
20
|
+
# PROVISIONING = "Provisioning"
|
|
21
|
+
# STARTING = "Starting"
|
|
22
|
+
RUNNING = "Running"
|
|
23
|
+
# CANCEL_REQUESTED = "CancelRequested"
|
|
24
|
+
CANCELED = "Canceled"
|
|
25
|
+
# FINALIZING = "Finalizing"
|
|
26
|
+
COMPLETED = "Completed"
|
|
27
|
+
FAILED = "Failed"
|
|
28
|
+
# UNAPPROVED = "Unapproved"
|
|
29
|
+
# NOTRESPONDING = "NotResponding"
|
|
30
|
+
# PAUSING = "Pausing"
|
|
31
|
+
# PAUSED = "Paused"
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_batch_result_status(status: BatchStatus) -> "RunStatus":
|
|
35
|
+
if status == BatchStatus.NotStarted:
|
|
36
|
+
return RunStatus.NOT_STARTED
|
|
37
|
+
if status == BatchStatus.Running:
|
|
38
|
+
return RunStatus.RUNNING
|
|
39
|
+
if status == BatchStatus.Completed:
|
|
40
|
+
return RunStatus.COMPLETED
|
|
41
|
+
if status == BatchStatus.Canceled:
|
|
42
|
+
return RunStatus.CANCELED
|
|
43
|
+
if status == BatchStatus.Failed:
|
|
44
|
+
return RunStatus.FAILED
|
|
45
|
+
|
|
46
|
+
return RunStatus.FAILED
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Run:
|
|
50
|
+
"""The equivalent of a Promptflow Run
|
|
51
|
+
promptflow-devkit/promptflow/_sdk/entities/_run.py
|
|
52
|
+
|
|
53
|
+
THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
dynamic_callable: Callable,
|
|
59
|
+
name_prefix: Optional[str],
|
|
60
|
+
inputs: Sequence[Mapping[str, Any]],
|
|
61
|
+
column_mapping: Optional[Mapping[str, str]] = None,
|
|
62
|
+
created_on: Optional[datetime] = None,
|
|
63
|
+
run: Optional["Run"] = None,
|
|
64
|
+
):
|
|
65
|
+
self._status: RunStatus = RunStatus.NOT_STARTED
|
|
66
|
+
self._created_on = created_on or datetime.now(timezone.utc)
|
|
67
|
+
self._start_time: Optional[datetime] = None
|
|
68
|
+
self._end_time: Optional[datetime] = None
|
|
69
|
+
|
|
70
|
+
self.dynamic_callable = dynamic_callable
|
|
71
|
+
self.name = self._generate_run_name(name_prefix, self._created_on)
|
|
72
|
+
self.inputs = inputs
|
|
73
|
+
self.column_mapping: Optional[Mapping[str, str]] = column_mapping
|
|
74
|
+
self.result: Optional[BatchResult] = None
|
|
75
|
+
self.metrics: Mapping[str, Any] = {}
|
|
76
|
+
self._run = run
|
|
77
|
+
|
|
78
|
+
# self._use_remote_flow = False
|
|
79
|
+
# self._from_flex_flow = True
|
|
80
|
+
# self._from_prompty = False
|
|
81
|
+
# self.flow = path to pointless flow file
|
|
82
|
+
# self._experiment_name = name of folder containing pointless flow file
|
|
83
|
+
# self._lineage_id = basically equivalent to a hex digest of the SHA256 hash of:
|
|
84
|
+
# f"{uuid.getnod()}/{posix_full_path_to_pointless_folder}"
|
|
85
|
+
# self._output_path = Path("<user_folder>/.promptflow/runs/<self.name>")
|
|
86
|
+
# self._flow_name = name of pointless folder
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def status(self) -> RunStatus:
|
|
90
|
+
return self._status
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def created_on(self) -> datetime:
|
|
94
|
+
return self._created_on
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def duration(self) -> Optional[timedelta]:
|
|
98
|
+
if self._start_time is None or self._end_time is None:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
return self._end_time - self._start_time
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def outputs(self) -> Sequence[Mapping[str, Any]]:
|
|
105
|
+
if self.result is None:
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
return [value or {} for value in self.result.results]
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def previous_run(self) -> Optional["Run"]:
|
|
112
|
+
return self._run
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _generate_run_name(name_prefix: Optional[str], creation_time: datetime) -> str:
|
|
116
|
+
# The Promptflow code looked at the folder name of the temporary folder used to
|
|
117
|
+
# store the temporary flow YAML file which was a single entry that told it look
|
|
118
|
+
# at the passed in dynamic_callable. Example folder name:
|
|
119
|
+
# azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_l82059h3
|
|
120
|
+
# instead we will use the passed in name_prefix or use a UUID (which is equally
|
|
121
|
+
# opaque as what the original code did)
|
|
122
|
+
if not name_prefix:
|
|
123
|
+
name_prefix = str(uuid4())
|
|
124
|
+
|
|
125
|
+
timestamp = creation_time.strftime("%Y%m%d_%H%M%S_%f")
|
|
126
|
+
name = f"{name_prefix}_{timestamp}"
|
|
127
|
+
return normalize_identifier_name(name)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source:
|
|
6
|
+
# promptflow-devkit/promptflow/_sdk/operations/_local_storage_operations.py
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from contextlib import AbstractContextManager
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Final, Mapping, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
from ._result import BatchResult, TokenMetrics, BatchStatus
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EVAL_USER_SUBFOLDER: Final[str] = ".evaluation"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AbstractRunLogger(AbstractContextManager):
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def file_path(self) -> Path:
|
|
24
|
+
"""Get the file path of the logger.
|
|
25
|
+
|
|
26
|
+
:return: The file path of the logger.
|
|
27
|
+
:rtype: Path
|
|
28
|
+
"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def get_logs(self) -> str:
|
|
33
|
+
"""Get the logs of the run.
|
|
34
|
+
|
|
35
|
+
:return: The logs of the run.
|
|
36
|
+
:rtype: str
|
|
37
|
+
"""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AbstractRunStorage(ABC):
|
|
42
|
+
@property
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def logger(self) -> "AbstractRunLogger":
|
|
45
|
+
"""Get the logger of the run.
|
|
46
|
+
|
|
47
|
+
:return: The logger of the run.
|
|
48
|
+
:rtype: ~promptflow.contracts.run_logger.RunLogger
|
|
49
|
+
"""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
54
|
+
"""Persist results of a batch engine execution (including any errors).
|
|
55
|
+
|
|
56
|
+
:param Optional[BatchResult] result: The result to persist.
|
|
57
|
+
"""
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
62
|
+
"""Load the exception from the storage. If there was no exception, an empty
|
|
63
|
+
mapping will be returned.
|
|
64
|
+
|
|
65
|
+
:return: The exception.
|
|
66
|
+
:rtype: Optional[Exception]
|
|
67
|
+
"""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
72
|
+
"""Load the inputs and outputs from the storage.
|
|
73
|
+
|
|
74
|
+
:return: The inputs and outputs.
|
|
75
|
+
:rtype: Tuple(Mapping[str, Any], BatchResult)
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
81
|
+
"""Load the metrics from the storage.
|
|
82
|
+
|
|
83
|
+
:return: The metrics.
|
|
84
|
+
:rtype: Mapping[str, Union[int, float, str]]
|
|
85
|
+
"""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class NoOpRunStorage(AbstractRunStorage):
|
|
90
|
+
"""A no-op implementation of the run storage."""
|
|
91
|
+
|
|
92
|
+
def __init__(self):
|
|
93
|
+
self._logger = NoOpLogger()
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def logger(self) -> AbstractRunLogger:
|
|
98
|
+
return self._logger
|
|
99
|
+
|
|
100
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
107
|
+
now = datetime.now(timezone.utc)
|
|
108
|
+
return {}, BatchResult(BatchStatus.NotStarted, 0, 0, now, now, TokenMetrics(0, 0, 0), [])
|
|
109
|
+
|
|
110
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class NoOpLogger(AbstractRunLogger):
|
|
115
|
+
"""A no-op implementation of the run logger."""
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def file_path(self) -> Path:
|
|
119
|
+
return Path.home() / EVAL_USER_SUBFOLDER
|
|
120
|
+
|
|
121
|
+
def __enter__(self) -> None:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
def __exit__(self, *args) -> None:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def get_logs(self) -> str:
|
|
128
|
+
return ""
|