azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +83 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +148 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +427 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/METADATA +366 -27
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from logging import Logger
|
|
9
|
+
from os import PathLike
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, AsyncGenerator, Awaitable, Dict, Final, List, Mapping, Optional, Sequence, Tuple, Union, cast
|
|
12
|
+
|
|
13
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI, NotGiven, OpenAIError
|
|
14
|
+
from openai.lib.azure import AsyncAzureADTokenProvider
|
|
15
|
+
from azure.core.credentials import TokenCredential
|
|
16
|
+
from azure.core.credentials_async import AsyncTokenCredential
|
|
17
|
+
|
|
18
|
+
from azure.ai.evaluation._exceptions import ErrorTarget
|
|
19
|
+
from azure.ai.evaluation._constants import DefaultOpenEncoding, TokenScope
|
|
20
|
+
from azure.ai.evaluation._legacy.prompty._exceptions import (
|
|
21
|
+
InvalidInputError,
|
|
22
|
+
PromptyException,
|
|
23
|
+
MissingRequiredInputError,
|
|
24
|
+
NotSupportedError,
|
|
25
|
+
WrappedOpenAIError,
|
|
26
|
+
)
|
|
27
|
+
from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection, Connection, OpenAIConnection
|
|
28
|
+
from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml_string
|
|
29
|
+
from azure.ai.evaluation._legacy.prompty._utils import (
|
|
30
|
+
dataclass_from_dict,
|
|
31
|
+
PromptyModelConfiguration,
|
|
32
|
+
OpenAIChatResponseType,
|
|
33
|
+
build_messages,
|
|
34
|
+
format_llm_response,
|
|
35
|
+
openai_error_retryable,
|
|
36
|
+
prepare_open_ai_request_params,
|
|
37
|
+
resolve_references,
|
|
38
|
+
update_dict_recursively,
|
|
39
|
+
)
|
|
40
|
+
from azure.ai.evaluation._constants import DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
|
|
41
|
+
from azure.ai.evaluation._legacy._common._logging import get_logger
|
|
42
|
+
from azure.ai.evaluation._legacy._common._async_token_provider import AsyncAzureTokenProvider
|
|
43
|
+
from azure.ai.evaluation._user_agent import UserAgentSingleton
|
|
44
|
+
|
|
45
|
+
PROMPTY_EXTENSION: Final[str] = ".prompty"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AsyncPrompty:
|
|
49
|
+
"""A prompty is a prompt with predefined metadata like inputs, and can be executed directly like a flow.
|
|
50
|
+
A prompty is represented as a templated markdown file with a modified front matter.
|
|
51
|
+
The front matter is a yaml file that contains meta fields like model configuration, inputs, etc..
|
|
52
|
+
|
|
53
|
+
Prompty example:
|
|
54
|
+
.. code-block::
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
name: Hello Prompty
|
|
58
|
+
description: A basic prompt
|
|
59
|
+
model:
|
|
60
|
+
api: chat
|
|
61
|
+
configuration:
|
|
62
|
+
type: azure_openai
|
|
63
|
+
azure_deployment: gpt-35-turbo
|
|
64
|
+
api_key="${env:AZURE_OPENAI_API_KEY}",
|
|
65
|
+
api_version=${env:AZURE_OPENAI_API_VERSION}",
|
|
66
|
+
azure_endpoint="${env:AZURE_OPENAI_ENDPOINT}",
|
|
67
|
+
parameters:
|
|
68
|
+
max_tokens: 128
|
|
69
|
+
temperature: 0.2
|
|
70
|
+
inputs:
|
|
71
|
+
text:
|
|
72
|
+
type: string
|
|
73
|
+
---
|
|
74
|
+
system:
|
|
75
|
+
Write a simple {{text}} program that displays the greeting message.
|
|
76
|
+
|
|
77
|
+
Prompty as function example:
|
|
78
|
+
|
|
79
|
+
.. code-block:: python
|
|
80
|
+
|
|
81
|
+
from azure.ai.evaluation._legacy.prompty import AsyncPrompty
|
|
82
|
+
prompty = Prompty(path="path/to/prompty.prompty")
|
|
83
|
+
result = prompty(input_a=1, input_b=2)
|
|
84
|
+
|
|
85
|
+
# Override model config with dict
|
|
86
|
+
model_config = {
|
|
87
|
+
"api": "chat",
|
|
88
|
+
"configuration": {
|
|
89
|
+
"type": "azure_openai",
|
|
90
|
+
"azure_deployment": "gpt-35-turbo",
|
|
91
|
+
"api_key": "${env:AZURE_OPENAI_API_KEY}",
|
|
92
|
+
"api_version": "${env:AZURE_OPENAI_API_VERSION}",
|
|
93
|
+
"azure_endpoint": "${env:AZURE_OPENAI_ENDPOINT}",
|
|
94
|
+
},
|
|
95
|
+
"parameters": {
|
|
96
|
+
"max_token": 512
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
prompty = Prompty.load(source="path/to/prompty.prompty", model=model_config)
|
|
100
|
+
result = prompty(input_a=1, input_b=2)
|
|
101
|
+
|
|
102
|
+
# Override model config with configuration
|
|
103
|
+
from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection
|
|
104
|
+
model_config = {
|
|
105
|
+
"api": "chat",
|
|
106
|
+
"configuration": AzureOpenAIModelConfiguration(
|
|
107
|
+
azure_deployment="gpt-35-turbo",
|
|
108
|
+
api_key="${env:AZURE_OPENAI_API_KEY}",
|
|
109
|
+
api_version="${env:AZURE_OPENAI_API_VERSION}",
|
|
110
|
+
azure_endpoint="${env:AZURE_OPENAI_ENDPOINT}",
|
|
111
|
+
),
|
|
112
|
+
"parameters": {
|
|
113
|
+
"max_token": 512
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
prompty = Prompty(path="path/to/prompty.prompty", model=model_config)
|
|
117
|
+
result = prompty(input_a=1, input_b=2)
|
|
118
|
+
|
|
119
|
+
# Override model config with created connection
|
|
120
|
+
from azure.ai.evaluation._legacy.prompty._connection import AzureOpenAIConnection
|
|
121
|
+
model_config = {
|
|
122
|
+
"api": "chat",
|
|
123
|
+
"configuration": AzureOpenAIModelConfiguration(
|
|
124
|
+
connection="azure_open_ai_connection",
|
|
125
|
+
azure_deployment="gpt-35-turbo",
|
|
126
|
+
),
|
|
127
|
+
"parameters": {
|
|
128
|
+
"max_token": 512
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
prompty = Prompty(path="path/to/prompty.prompty", model=model_config)
|
|
132
|
+
result = prompty(input_a=1, input_b=2)
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
path: Union[str, PathLike],
|
|
138
|
+
*,
|
|
139
|
+
logger: Optional[Logger] = None,
|
|
140
|
+
token_credential: Optional[Union[TokenCredential, AsyncTokenCredential]] = None,
|
|
141
|
+
is_reasoning_model: bool = False,
|
|
142
|
+
**kwargs: Any,
|
|
143
|
+
):
|
|
144
|
+
path = Path(path)
|
|
145
|
+
configs, self._template = self._parse_prompty(path)
|
|
146
|
+
|
|
147
|
+
if is_reasoning_model:
|
|
148
|
+
parameters = configs.get("model", {}).get("parameters", {})
|
|
149
|
+
if "max_tokens" in parameters:
|
|
150
|
+
parameters.pop("max_tokens", None)
|
|
151
|
+
parameters["max_completion_tokens"] = DEFAULT_MAX_COMPLETION_TOKENS_REASONING_MODELS
|
|
152
|
+
# Remove unsupported parameters for reasoning models
|
|
153
|
+
for key in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
|
|
154
|
+
parameters.pop(key, None)
|
|
155
|
+
|
|
156
|
+
configs = resolve_references(configs, base_path=path.parent)
|
|
157
|
+
configs = update_dict_recursively(configs, resolve_references(kwargs, base_path=path.parent))
|
|
158
|
+
|
|
159
|
+
if configs["model"].get("api") == "completion":
|
|
160
|
+
raise InvalidInputError(
|
|
161
|
+
"Prompty does not support the completion API. Please use the 'chat' completions API instead."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
self._data = configs
|
|
165
|
+
self._path = path
|
|
166
|
+
self._model = dataclass_from_dict(PromptyModelConfiguration, configs["model"])
|
|
167
|
+
self._inputs: Dict[str, Any] = configs.get("inputs", {})
|
|
168
|
+
self._outputs: Dict[str, Any] = configs.get("outputs", {})
|
|
169
|
+
self._name: str = configs.get("name", path.stem)
|
|
170
|
+
self._logger = logger or get_logger(__name__)
|
|
171
|
+
self._token_credential: Union[TokenCredential, AsyncTokenCredential] = (
|
|
172
|
+
token_credential or AsyncAzureTokenProvider()
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def path(self) -> Path:
|
|
177
|
+
"""Path of the prompty file.
|
|
178
|
+
|
|
179
|
+
:return: The path of the prompty file.
|
|
180
|
+
:rtype: Path
|
|
181
|
+
"""
|
|
182
|
+
return self._path
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def name(self) -> str:
|
|
186
|
+
"""Name of the prompty.
|
|
187
|
+
|
|
188
|
+
:return: The name of the prompty.
|
|
189
|
+
:rtype: str
|
|
190
|
+
"""
|
|
191
|
+
return self._name
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def description(self) -> Optional[str]:
|
|
195
|
+
"""Description of the prompty.
|
|
196
|
+
|
|
197
|
+
:return: The description of the prompty.
|
|
198
|
+
:rtype: str
|
|
199
|
+
"""
|
|
200
|
+
return self._data.get("description")
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def load(
|
|
204
|
+
cls,
|
|
205
|
+
source: Union[str, PathLike],
|
|
206
|
+
**kwargs,
|
|
207
|
+
) -> "AsyncPrompty":
|
|
208
|
+
"""
|
|
209
|
+
Loads the prompty file.
|
|
210
|
+
|
|
211
|
+
:param source: The local prompty file. Must be a path to a local file.
|
|
212
|
+
An exception is raised if the file does not exist.
|
|
213
|
+
:type source: Union[PathLike, str]
|
|
214
|
+
:return: A Prompty object
|
|
215
|
+
:rtype: Prompty
|
|
216
|
+
"""
|
|
217
|
+
source_path = Path(source)
|
|
218
|
+
if not source_path.exists():
|
|
219
|
+
raise PromptyException(f"Source {source_path.absolute().as_posix()} does not exist")
|
|
220
|
+
|
|
221
|
+
if source_path.suffix != PROMPTY_EXTENSION:
|
|
222
|
+
raise PromptyException("Source must be a file with .prompty extension.")
|
|
223
|
+
|
|
224
|
+
return cls(path=source_path, **kwargs)
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def _parse_prompty(path) -> Tuple[Dict[str, Any], str]:
|
|
228
|
+
with open(path, "r", encoding=DefaultOpenEncoding.READ) as f:
|
|
229
|
+
prompty_content = f.read()
|
|
230
|
+
pattern = r"-{3,}\n(.*)-{3,}\n(.*)"
|
|
231
|
+
result = re.search(pattern, prompty_content, re.DOTALL)
|
|
232
|
+
if not result:
|
|
233
|
+
raise PromptyException(
|
|
234
|
+
"Illegal formatting of prompty. The prompt file is in markdown format and can be divided into two "
|
|
235
|
+
"parts, the first part is in YAML format and contains connection and model information. The second "
|
|
236
|
+
"part is the prompt template."
|
|
237
|
+
)
|
|
238
|
+
config_content, prompt_template = result.groups()
|
|
239
|
+
configs = load_yaml_string(config_content)
|
|
240
|
+
return configs, prompt_template
|
|
241
|
+
|
|
242
|
+
def _resolve_inputs(self, input_values: Dict[str, Any]) -> Mapping[str, Any]:
|
|
243
|
+
"""
|
|
244
|
+
Resolve prompty inputs. If not provide input_values, sample data will be regarded as input value.
|
|
245
|
+
For inputs are not provided, the default value in the input signature will be used.
|
|
246
|
+
|
|
247
|
+
:param Dict[str, Any] input_values: The input values provided by the user.
|
|
248
|
+
:return: The resolved inputs.
|
|
249
|
+
:rtype: Mapping[str, Any]
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
resolved_inputs: Dict[str, Any] = {}
|
|
253
|
+
missing_inputs: List[str] = []
|
|
254
|
+
for input_name, value in self._inputs.items():
|
|
255
|
+
if input_name not in input_values and "default" not in value:
|
|
256
|
+
missing_inputs.append(input_name)
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
resolved_inputs[input_name] = input_values.get(input_name, value.get("default", None))
|
|
260
|
+
|
|
261
|
+
if missing_inputs:
|
|
262
|
+
raise MissingRequiredInputError(f"Missing required inputs: {missing_inputs}")
|
|
263
|
+
|
|
264
|
+
return resolved_inputs
|
|
265
|
+
|
|
266
|
+
async def __call__( # pylint: disable=docstring-keyword-should-match-keyword-only
|
|
267
|
+
self,
|
|
268
|
+
**kwargs: Any,
|
|
269
|
+
) -> dict:
|
|
270
|
+
"""Calling prompty as a function in async, the inputs should be provided with key word arguments.
|
|
271
|
+
Returns the output of the prompty.
|
|
272
|
+
|
|
273
|
+
The function call throws PromptyException if the Prompty file is not valid or the inputs are not valid.
|
|
274
|
+
|
|
275
|
+
:keyword kwargs: Additional keyword arguments passed to the parent class.
|
|
276
|
+
:paramtype kwargs: Any
|
|
277
|
+
:return: The output of the prompty.
|
|
278
|
+
:rtype: ChatCompletion | AsyncStream[ChatCompletionChunk] | AsyncGenerator[str] | str | Mapping[str, Any]
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
inputs = self._resolve_inputs(kwargs)
|
|
282
|
+
connection = Connection.parse_from_config(self._model.configuration)
|
|
283
|
+
messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
|
|
284
|
+
params = prepare_open_ai_request_params(self._model, messages)
|
|
285
|
+
|
|
286
|
+
timeout: Optional[float] = None
|
|
287
|
+
if timeout_val := cast(Any, kwargs.get("timeout", None)):
|
|
288
|
+
timeout = float(timeout_val)
|
|
289
|
+
|
|
290
|
+
# disable OpenAI's built-in retry mechanism by using our own retry
|
|
291
|
+
# for better debugging and real-time status updates.
|
|
292
|
+
max_retries = 0
|
|
293
|
+
|
|
294
|
+
default_headers = {"User-Agent": UserAgentSingleton().value}
|
|
295
|
+
|
|
296
|
+
api_client: Union[AsyncAzureOpenAI, AsyncOpenAI]
|
|
297
|
+
if isinstance(connection, AzureOpenAIConnection):
|
|
298
|
+
api_client = AsyncAzureOpenAI(
|
|
299
|
+
azure_endpoint=connection.azure_endpoint,
|
|
300
|
+
api_key=connection.api_key,
|
|
301
|
+
azure_deployment=connection.azure_deployment,
|
|
302
|
+
api_version=connection.api_version,
|
|
303
|
+
max_retries=max_retries,
|
|
304
|
+
azure_ad_token_provider=(
|
|
305
|
+
self.get_token_provider(self._token_credential) if not connection.api_key else None
|
|
306
|
+
),
|
|
307
|
+
default_headers=default_headers,
|
|
308
|
+
)
|
|
309
|
+
elif isinstance(connection, OpenAIConnection):
|
|
310
|
+
api_client = AsyncOpenAI(
|
|
311
|
+
base_url=connection.base_url,
|
|
312
|
+
api_key=connection.api_key,
|
|
313
|
+
organization=connection.organization,
|
|
314
|
+
max_retries=max_retries,
|
|
315
|
+
default_headers=default_headers,
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
raise NotSupportedError(
|
|
319
|
+
f"'{type(connection).__name__}' is not a supported connection type.", target=ErrorTarget.EVAL_RUN
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
response: OpenAIChatResponseType = await self._send_with_retries(
|
|
323
|
+
api_client=api_client,
|
|
324
|
+
params=params,
|
|
325
|
+
timeout=timeout,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return await format_llm_response(
|
|
329
|
+
response=response,
|
|
330
|
+
is_first_choice=self._data.get("model", {}).get("response", "first").lower() == "first",
|
|
331
|
+
response_format=params.get("response_format", {}),
|
|
332
|
+
outputs=self._outputs,
|
|
333
|
+
inputs=inputs,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def render( # pylint: disable=docstring-keyword-should-match-keyword-only
|
|
337
|
+
self, **kwargs: Any
|
|
338
|
+
) -> Sequence[Mapping[str, Any]]:
|
|
339
|
+
"""Render the prompt content.
|
|
340
|
+
|
|
341
|
+
:keyword kwargs: Additional keyword arguments passed to the parent class.
|
|
342
|
+
:paramtype kwargs: Any
|
|
343
|
+
:return: Prompt content
|
|
344
|
+
:rtype: Sequence[Mapping[str, Any]]
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
inputs = self._resolve_inputs(kwargs)
|
|
348
|
+
messages = build_messages(prompt=self._template, working_dir=self.path.parent, **inputs)
|
|
349
|
+
return messages
|
|
350
|
+
|
|
351
|
+
async def _send_with_retries(
|
|
352
|
+
self,
|
|
353
|
+
api_client: Union[AsyncAzureOpenAI, AsyncOpenAI],
|
|
354
|
+
params: Mapping[str, Any],
|
|
355
|
+
timeout: Optional[float],
|
|
356
|
+
max_retries: int = 10,
|
|
357
|
+
max_entity_retries: int = 3,
|
|
358
|
+
) -> OpenAIChatResponseType:
|
|
359
|
+
"""Send the request with retries.
|
|
360
|
+
|
|
361
|
+
:param Union[AsyncAzureOpenAI, AsyncOpenAI] api_client: The OpenAI client.
|
|
362
|
+
:param Mapping[str, Any] params: The request parameters.
|
|
363
|
+
:param Optional[float] timeout: The timeout for the request.
|
|
364
|
+
:param int max_retries: The maximum number of retries.
|
|
365
|
+
:param int max_entity_retries: The maximum number of retries for entity errors.
|
|
366
|
+
:return: The response from OpenAI.
|
|
367
|
+
:rtype: OpenAIChatResponseType
|
|
368
|
+
"""
|
|
369
|
+
|
|
370
|
+
client_name: str = api_client.__class__.__name__
|
|
371
|
+
client: Union[AsyncAzureOpenAI, AsyncOpenAI] = api_client.with_options(timeout=timeout or NotGiven())
|
|
372
|
+
|
|
373
|
+
entity_retries: List[int] = [0]
|
|
374
|
+
should_retry: bool = True
|
|
375
|
+
retry: int = 0
|
|
376
|
+
delay: Optional[float] = None
|
|
377
|
+
|
|
378
|
+
while should_retry:
|
|
379
|
+
try:
|
|
380
|
+
if delay:
|
|
381
|
+
await asyncio.sleep(delay)
|
|
382
|
+
|
|
383
|
+
response = await client.chat.completions.create(**params)
|
|
384
|
+
return response
|
|
385
|
+
except OpenAIError as error:
|
|
386
|
+
if retry >= max_retries:
|
|
387
|
+
should_retry = False
|
|
388
|
+
else:
|
|
389
|
+
should_retry, delay = openai_error_retryable(error, retry, entity_retries, max_entity_retries)
|
|
390
|
+
|
|
391
|
+
if should_retry:
|
|
392
|
+
self._logger.warning(
|
|
393
|
+
"[%d/%d] %s request failed. %s: %s. Retrying in %f seconds.",
|
|
394
|
+
retry,
|
|
395
|
+
max_retries,
|
|
396
|
+
client_name,
|
|
397
|
+
type(error).__name__,
|
|
398
|
+
str(error),
|
|
399
|
+
delay or 0.0,
|
|
400
|
+
exc_info=True,
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
self._logger.exception(
|
|
404
|
+
"[%d/%d] %s request failed. %s: %s",
|
|
405
|
+
retry,
|
|
406
|
+
max_retries,
|
|
407
|
+
client_name,
|
|
408
|
+
type(error).__name__,
|
|
409
|
+
str(error),
|
|
410
|
+
)
|
|
411
|
+
raise WrappedOpenAIError(error=error) from error
|
|
412
|
+
|
|
413
|
+
retry += 1
|
|
414
|
+
|
|
415
|
+
@staticmethod
|
|
416
|
+
def get_token_provider(cred: Union[TokenCredential, AsyncTokenCredential]) -> AsyncAzureADTokenProvider:
|
|
417
|
+
"""Get the token provider for the prompty.
|
|
418
|
+
|
|
419
|
+
:param Union[TokenCredential, AsyncTokenCredential] cred: The Azure authentication credential.
|
|
420
|
+
:return: The token provider if a credential is provided, otherwise None.
|
|
421
|
+
:rtype: Optional[AsyncAzureADTokenProvider]
|
|
422
|
+
"""
|
|
423
|
+
|
|
424
|
+
async def _wrapper() -> str:
|
|
425
|
+
token = cred.get_token(TokenScope.COGNITIVE_SERVICES_MANAGEMENT)
|
|
426
|
+
if isinstance(token, Awaitable):
|
|
427
|
+
token = await token
|
|
428
|
+
return token.token
|
|
429
|
+
|
|
430
|
+
return _wrapper
|