azure-ai-evaluation 1.0.0b2__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +100 -5
- azure/ai/evaluation/{_evaluators/_chat → _aoai}/__init__.py +3 -2
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/{simulator/_helpers → _common}/_experimental.py +24 -9
- azure/ai/evaluation/_common/constants.py +131 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/math.py +89 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +831 -142
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +870 -34
- azure/ai/evaluation/_constants.py +167 -6
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +83 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +17 -0
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/{_batch_run_client → _batch_run}/code_client.py +47 -25
- azure/ai/evaluation/_evaluate/{_batch_run_client/batch_run_context.py → _batch_run/eval_run_context.py} +42 -13
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +124 -0
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +62 -0
- azure/ai/evaluation/_evaluate/_eval_run.py +102 -59
- azure/ai/evaluation/_evaluate/_evaluate.py +2134 -311
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +14 -99
- azure/ai/evaluation/_evaluate/_utils.py +289 -40
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +93 -42
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +117 -91
- azure/ai/evaluation/_evaluators/_coherence/coherence.prompty +76 -39
- azure/ai/evaluation/_evaluators/_common/__init__.py +15 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +742 -0
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +345 -0
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +198 -0
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/__init__.py +0 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +144 -86
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +138 -57
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +123 -55
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +133 -54
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +134 -54
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +49 -56
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +102 -60
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +115 -92
- azure/ai/evaluation/_evaluators/_fluency/fluency.prompty +66 -41
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +90 -37
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +318 -82
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +114 -0
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +104 -0
- azure/ai/evaluation/{_evaluate/_batch_run_client → _evaluators/_intent_resolution}/__init__.py +3 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +107 -61
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +104 -77
- azure/ai/evaluation/_evaluators/_qa/_qa.py +115 -63
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +182 -98
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +178 -49
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/{_chat/retrieval → _retrieval}/__init__.py +2 -2
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +148 -0
- azure/ai/evaluation/_evaluators/_retrieval/retrieval.prompty +93 -0
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +189 -50
- azure/ai/evaluation/_evaluators/_service_groundedness/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +179 -0
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +102 -91
- azure/ai/evaluation/_evaluators/_similarity/similarity.prompty +0 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_tool_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py +301 -0
- azure/ai/evaluation/_evaluators/_tool_success/tool_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +109 -107
- azure/ai/evaluation/_exceptions.py +51 -7
- azure/ai/evaluation/_http_utils.py +210 -137
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +130 -8
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/__init__.py +3 -0
- azure/ai/evaluation/_vendor/rouge_score/__init__.py +14 -0
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +324 -0
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +59 -0
- azure/ai/evaluation/_vendor/rouge_score/tokenizers.py +53 -0
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/__init__.py +2 -1
- azure/ai/evaluation/simulator/_adversarial_scenario.py +26 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +270 -144
- azure/ai/evaluation/simulator/_constants.py +12 -1
- azure/ai/evaluation/simulator/_conversation/__init__.py +151 -23
- azure/ai/evaluation/simulator/_conversation/_conversation.py +10 -6
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_data_sources/__init__.py +3 -0
- azure/ai/evaluation/simulator/_data_sources/grounding.json +1150 -0
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +54 -75
- azure/ai/evaluation/simulator/_helpers/__init__.py +1 -2
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_helpers/_simulator_data_classes.py +26 -5
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +145 -104
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +80 -30
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +117 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +109 -7
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +97 -33
- azure/ai/evaluation/simulator/_model_tools/models.py +30 -27
- azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +6 -10
- azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +6 -5
- azure/ai/evaluation/simulator/_simulator.py +302 -208
- azure/ai/evaluation/simulator/_utils.py +31 -13
- azure_ai_evaluation-1.13.3.dist-info/METADATA +939 -0
- azure_ai_evaluation-1.13.3.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/WHEEL +1 -1
- azure_ai_evaluation-1.13.3.dist-info/licenses/NOTICE.txt +70 -0
- azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py +0 -71
- azure/ai/evaluation/_evaluators/_chat/_chat.py +0 -357
- azure/ai/evaluation/_evaluators/_chat/retrieval/_retrieval.py +0 -157
- azure/ai/evaluation/_evaluators/_chat/retrieval/retrieval.prompty +0 -48
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_base.py +0 -65
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py +0 -301
- azure/ai/evaluation/_evaluators/_groundedness/groundedness.prompty +0 -54
- azure/ai/evaluation/_evaluators/_protected_materials/__init__.py +0 -5
- azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py +0 -104
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.0b2.dist-info/METADATA +0 -449
- azure_ai_evaluation-1.0.0b2.dist-info/RECORD +0 -99
- {azure_ai_evaluation-1.0.0b2.dist-info → azure_ai_evaluation-1.13.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any, AsyncContextManager, Optional
|
|
7
|
+
|
|
8
|
+
from azure.core.credentials import AccessToken, TokenCredential
|
|
9
|
+
from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedIdentityCredential
|
|
10
|
+
|
|
11
|
+
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
|
|
12
|
+
from azure.ai.evaluation._azure._envs import AzureEnvironmentClient
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AsyncAzureTokenProvider(AsyncContextManager["AsyncAzureTokenProvider"]):
|
|
16
|
+
"""Asynchronous token provider for Azure services that supports non-default Azure clouds
|
|
17
|
+
(e.g. Azure China, Azure US Government, etc.)."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, *, base_url: Optional[str] = None, **kwargs: Any) -> None:
|
|
20
|
+
"""Initialize the AsyncAzureTokenProvider."""
|
|
21
|
+
self._credential: Optional[TokenCredential] = None
|
|
22
|
+
self._env_client: Optional[AzureEnvironmentClient] = AzureEnvironmentClient(base_url=base_url, **kwargs)
|
|
23
|
+
|
|
24
|
+
async def close(self) -> None:
|
|
25
|
+
if self._env_client:
|
|
26
|
+
await self._env_client.close()
|
|
27
|
+
self._env_client = None
|
|
28
|
+
|
|
29
|
+
self._credential = None
|
|
30
|
+
|
|
31
|
+
async def get_token(
|
|
32
|
+
self,
|
|
33
|
+
*scopes: str,
|
|
34
|
+
claims: Optional[str] = None,
|
|
35
|
+
tenant_id: Optional[str] = None,
|
|
36
|
+
enable_cae: bool = False,
|
|
37
|
+
**kwargs: Any,
|
|
38
|
+
) -> AccessToken:
|
|
39
|
+
if self._credential is None:
|
|
40
|
+
self._credential = await self._initialize_async(self._env_client)
|
|
41
|
+
|
|
42
|
+
if self._credential is None:
|
|
43
|
+
raise EvaluationException(
|
|
44
|
+
f"{self.__class__.__name__} could not determine the credential to use.",
|
|
45
|
+
target=ErrorTarget.UNKNOWN,
|
|
46
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
47
|
+
blame=ErrorBlame.SYSTEM_ERROR,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
return self._credential.get_token(*scopes, claims=claims, tenant_id=tenant_id, enable_cae=enable_cae, **kwargs)
|
|
51
|
+
|
|
52
|
+
async def __aenter__(self) -> "AsyncAzureTokenProvider":
|
|
53
|
+
self._credential = await self._initialize_async(self._env_client)
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
async def __aexit__(
|
|
57
|
+
self,
|
|
58
|
+
exc_type: Optional[type] = None,
|
|
59
|
+
exc_value: Optional[BaseException] = None,
|
|
60
|
+
traceback: Optional[Any] = None,
|
|
61
|
+
) -> None:
|
|
62
|
+
await self.close()
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
async def _initialize_async(client: Optional[AzureEnvironmentClient]) -> TokenCredential:
|
|
66
|
+
# Determine which credential to use based on the configured Azure cloud environment variables
|
|
67
|
+
# and possibly making network calls to Azure to get the correct Azure cloud metadata.
|
|
68
|
+
if client is None:
|
|
69
|
+
raise EvaluationException(
|
|
70
|
+
f"{AsyncAzureTokenProvider.__name__} instance has already been closed.",
|
|
71
|
+
target=ErrorTarget.UNKNOWN,
|
|
72
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
73
|
+
blame=ErrorBlame.USER_ERROR,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
cloud_name: str = await client.get_default_cloud_name_async()
|
|
77
|
+
if cloud_name != client.DEFAULT_AZURE_CLOUD_NAME:
|
|
78
|
+
# If the cloud name is not the default, we need to get the metadata for the specified cloud
|
|
79
|
+
# and set it in the environment client.
|
|
80
|
+
metadata = await client.get_cloud_async(cloud_name)
|
|
81
|
+
if metadata is None:
|
|
82
|
+
raise EvaluationException(
|
|
83
|
+
f"Failed to get metadata for cloud '{cloud_name}'.",
|
|
84
|
+
target=ErrorTarget.UNKNOWN,
|
|
85
|
+
category=ErrorCategory.INVALID_VALUE,
|
|
86
|
+
blame=ErrorBlame.USER_ERROR,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
authority = metadata.get("active_directory_endpoint")
|
|
90
|
+
return DefaultAzureCredential(authority=authority, exclude_shared_token_cache_credential=True)
|
|
91
|
+
elif os.getenv("AZUREML_OBO_ENABLED"):
|
|
92
|
+
# using Azure on behalf of credentials requires the use of the azure-ai-ml package
|
|
93
|
+
try:
|
|
94
|
+
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
|
|
95
|
+
|
|
96
|
+
return AzureMLOnBehalfOfCredential() # type: ignore
|
|
97
|
+
except (ModuleNotFoundError, ImportError):
|
|
98
|
+
raise EvaluationException( # pylint: disable=raise-missing-from
|
|
99
|
+
message=(
|
|
100
|
+
"The required packages for OBO credentials are missing.\n"
|
|
101
|
+
'To resolve this, please install them by running "pip install azure-ai-ml".'
|
|
102
|
+
),
|
|
103
|
+
target=ErrorTarget.EVALUATE,
|
|
104
|
+
category=ErrorCategory.MISSING_PACKAGE,
|
|
105
|
+
blame=ErrorBlame.USER_ERROR,
|
|
106
|
+
)
|
|
107
|
+
elif os.environ.get("PF_USE_AZURE_CLI_CREDENTIAL", "false").lower() == "true":
|
|
108
|
+
# TODO ralphe: Is this still needed? DefaultAzureCredential already includes CLI credentials
|
|
109
|
+
# albeit with a lower priority
|
|
110
|
+
return AzureCliCredential()
|
|
111
|
+
elif os.environ.get("IS_IN_CI_PIPELINE", "false").lower() == "true":
|
|
112
|
+
# use managed identity when executing in CI pipeline.
|
|
113
|
+
return AzureCliCredential()
|
|
114
|
+
elif identity_client_id := os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"):
|
|
115
|
+
return ManagedIdentityCredential(client_id=identity_client_id)
|
|
116
|
+
else:
|
|
117
|
+
return DefaultAzureCredential()
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source:
|
|
6
|
+
# - promptflow-core/promptflow/_core/log_manager.py
|
|
7
|
+
# - promptflow-core/promptflow/_utils/logger_utils.py
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
from re import Pattern
|
|
14
|
+
from contextvars import ContextVar
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from io import StringIO, TextIOBase
|
|
18
|
+
from typing import Any, Dict, Final, Mapping, Optional, Set, TextIO, Tuple, Union
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
valid_logging_level: Final[Set[str]] = {"CRITICAL", "FATAL", "ERROR", "WARN", "WARNING", "INFO", "DEBUG", "NOTSET"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_pf_logging_level(default=logging.INFO):
|
|
25
|
+
logging_level = os.environ.get("PF_LOGGING_LEVEL", None)
|
|
26
|
+
if logging_level not in valid_logging_level:
|
|
27
|
+
# Fall back to info if user input is invalid.
|
|
28
|
+
logging_level = default
|
|
29
|
+
return logging_level
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_format_for_logger(
|
|
33
|
+
default_log_format: Optional[str] = None, default_date_format: Optional[str] = None
|
|
34
|
+
) -> Tuple[str, str]:
|
|
35
|
+
"""
|
|
36
|
+
Get the logging format and date format for logger.
|
|
37
|
+
|
|
38
|
+
This function attempts to find the handler of the root logger with a configured formatter.
|
|
39
|
+
If such a handler is found, it returns the format and date format used by this handler.
|
|
40
|
+
This can be configured through logging.basicConfig. If no configured formatter is found,
|
|
41
|
+
it defaults to LOG_FORMAT and DATETIME_FORMAT.
|
|
42
|
+
"""
|
|
43
|
+
log_format = (
|
|
44
|
+
os.environ.get("PF_LOG_FORMAT")
|
|
45
|
+
or default_log_format
|
|
46
|
+
or "%(asctime)s %(thread)7d %(name)-18s %(levelname)-8s %(message)s"
|
|
47
|
+
)
|
|
48
|
+
datetime_format = os.environ.get("PF_LOG_DATETIME_FORMAT") or default_date_format or "%Y-%m-%d %H:%M:%S %z"
|
|
49
|
+
return log_format, datetime_format
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_logger(name: str) -> logging.Logger:
|
|
53
|
+
"""Get logger used during execution."""
|
|
54
|
+
logger = logging.Logger(name)
|
|
55
|
+
logger.setLevel(get_pf_logging_level())
|
|
56
|
+
# logger.addHandler(FileHandlerConcurrentWrapper())
|
|
57
|
+
stdout_handler = logging.StreamHandler(sys.stdout)
|
|
58
|
+
fmt, datefmt = _get_format_for_logger()
|
|
59
|
+
# TODO ralphe: Do we need a credentials scrubber here like the old code had? We are not logging
|
|
60
|
+
# logging anything that sensitive here.
|
|
61
|
+
stdout_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
|
|
62
|
+
logger.addHandler(stdout_handler)
|
|
63
|
+
return logger
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def scrub_credentials(s: str):
|
|
67
|
+
"""Scrub credentials in string s.
|
|
68
|
+
|
|
69
|
+
For example, for input string: "print accountkey=accountKey", the output will be:
|
|
70
|
+
"print accountkey=**data_scrubbed**"
|
|
71
|
+
"""
|
|
72
|
+
# for h in logger.handlers:
|
|
73
|
+
# if isinstance(h, FileHandlerConcurrentWrapper):
|
|
74
|
+
# if h.handler and h.handler._formatter:
|
|
75
|
+
# credential_scrubber = h.handler._formatter.credential_scrubber
|
|
76
|
+
# if credential_scrubber:
|
|
77
|
+
# return credential_scrubber.scrub(s)
|
|
78
|
+
return CredentialScrubber.scrub(s)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class CredentialScrubber:
|
|
82
|
+
"""Scrub sensitive information in string."""
|
|
83
|
+
|
|
84
|
+
PLACE_HOLDER = "**data_scrubbed**"
|
|
85
|
+
LENGTH_THRESHOLD = 2
|
|
86
|
+
DEFAULT_REGEX_SET: Final[Set[Pattern[str]]] = {
|
|
87
|
+
re.compile(r"(?<=sig=)[^\s;&]+", flags=re.IGNORECASE), # Replace signature.
|
|
88
|
+
re.compile(r"(?<=key=)[^\s;&]+", flags=re.IGNORECASE), # Replace key.
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def scrub(input: str) -> str:
|
|
93
|
+
"""Replace sensitive information in input string with PLACE_HOLDER.
|
|
94
|
+
|
|
95
|
+
For example, for input string: "print accountkey=accountKey", the output will be:
|
|
96
|
+
"print accountkey=**data_scrubbed**"
|
|
97
|
+
"""
|
|
98
|
+
output = input
|
|
99
|
+
for regex in CredentialScrubber.DEFAULT_REGEX_SET:
|
|
100
|
+
output = regex.sub(CredentialScrubber.PLACE_HOLDER, output)
|
|
101
|
+
return output
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Logs by flow_logger will only be shown in flow mode.
|
|
105
|
+
# These logs should contain all detailed logs from executor and runtime.
|
|
106
|
+
flow_logger = get_logger("execution.flow")
|
|
107
|
+
|
|
108
|
+
# Logs by bulk_logger will only be shown in bulktest and eval modes.
|
|
109
|
+
# These logs should contain overall progress logs and error logs.
|
|
110
|
+
bulk_logger = get_logger("execution.bulk")
|
|
111
|
+
|
|
112
|
+
# Logs by logger will be shown in all the modes above,
|
|
113
|
+
# such as error logs.
|
|
114
|
+
logger = get_logger("execution")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def log_progress(
|
|
118
|
+
run_start_time: datetime,
|
|
119
|
+
total_count: int,
|
|
120
|
+
current_count: int,
|
|
121
|
+
logger: logging.Logger = bulk_logger,
|
|
122
|
+
formatter="Finished {count} / {total_count} lines.",
|
|
123
|
+
) -> None:
|
|
124
|
+
if current_count > 0:
|
|
125
|
+
delta = datetime.now(timezone.utc).timestamp() - run_start_time.timestamp()
|
|
126
|
+
average_execution_time = round(delta / current_count, 2)
|
|
127
|
+
estimated_execution_time = round(average_execution_time * (total_count - current_count), 2)
|
|
128
|
+
logger.info(formatter.format(count=current_count, total_count=total_count))
|
|
129
|
+
logger.info(
|
|
130
|
+
f"Average execution time for completed lines: {average_execution_time} seconds. "
|
|
131
|
+
f"Estimated time for incomplete lines: {estimated_execution_time} seconds."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def incremental_print(log: str, printed: int, fileout: Union[TextIO, Any]) -> int:
|
|
136
|
+
count = 0
|
|
137
|
+
for line in log.splitlines():
|
|
138
|
+
if count >= printed:
|
|
139
|
+
fileout.write(line + "\n")
|
|
140
|
+
printed += 1
|
|
141
|
+
count += 1
|
|
142
|
+
return printed
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def print_red_error(message):
|
|
146
|
+
try:
|
|
147
|
+
from colorama import Fore, init
|
|
148
|
+
|
|
149
|
+
init(autoreset=True)
|
|
150
|
+
print(Fore.RED + message)
|
|
151
|
+
except ImportError:
|
|
152
|
+
print(message)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass
|
|
156
|
+
class NodeInfo:
|
|
157
|
+
run_id: str
|
|
158
|
+
node_name: str
|
|
159
|
+
line_number: int
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class NodeLogManager:
|
|
163
|
+
"""Replace sys.stdout and sys.stderr with NodeLogWriter.
|
|
164
|
+
|
|
165
|
+
This class intercepts and saves logs to stdout/stderr when executing a node. For example:
|
|
166
|
+
with NodeLogManager() as log_manager:
|
|
167
|
+
print('test stdout')
|
|
168
|
+
print('test stderr', file=sys.stderr)
|
|
169
|
+
|
|
170
|
+
log_manager.get_logs() will return: {'stdout': 'test stdout\n', 'stderr': 'test stderr\n'}
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, record_datetime: bool = True):
|
|
174
|
+
self.stdout_logger = NodeLogWriter(sys.stdout, record_datetime)
|
|
175
|
+
self.stderr_logger = NodeLogWriter(sys.stderr, record_datetime, is_stderr=True)
|
|
176
|
+
|
|
177
|
+
def __enter__(self) -> "NodeLogManager":
|
|
178
|
+
"""Replace sys.stdout and sys.stderr with NodeLogWriter."""
|
|
179
|
+
self._prev_stdout = sys.stdout
|
|
180
|
+
self._prev_stderr = sys.stderr
|
|
181
|
+
sys.stdout = self.stdout_logger
|
|
182
|
+
sys.stderr = self.stderr_logger
|
|
183
|
+
return self
|
|
184
|
+
|
|
185
|
+
def __exit__(self, *args) -> None:
|
|
186
|
+
"""Restore sys.stdout and sys.stderr."""
|
|
187
|
+
sys.stdout = self._prev_stdout
|
|
188
|
+
sys.stderr = self._prev_stderr
|
|
189
|
+
|
|
190
|
+
def set_node_context(self, run_id: str, node_name: str, line_number: int) -> None:
|
|
191
|
+
"""Set node context."""
|
|
192
|
+
self.stdout_logger.set_node_info(run_id, node_name, line_number)
|
|
193
|
+
self.stderr_logger.set_node_info(run_id, node_name, line_number)
|
|
194
|
+
|
|
195
|
+
def clear_node_context(self, run_id: str) -> None:
|
|
196
|
+
"""Clear node context."""
|
|
197
|
+
self.stdout_logger.clear_node_info(run_id)
|
|
198
|
+
self.stderr_logger.clear_node_info(run_id)
|
|
199
|
+
|
|
200
|
+
def get_logs(self, run_id: str) -> Mapping[str, str]:
|
|
201
|
+
return {
|
|
202
|
+
"stdout": self.stdout_logger.get_log(run_id),
|
|
203
|
+
"stderr": self.stderr_logger.get_log(run_id),
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class NodeLogWriter(TextIOBase):
|
|
208
|
+
"""Record node run logs."""
|
|
209
|
+
|
|
210
|
+
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
|
|
211
|
+
|
|
212
|
+
def __init__(self, prev_stdout: Union[TextIOBase, Any], record_datetime: bool = True, is_stderr: bool = False):
|
|
213
|
+
self.run_id_to_stdout: Dict[str, StringIO] = {}
|
|
214
|
+
self._context: ContextVar[Optional[NodeInfo]] = ContextVar("run_log_info", default=None)
|
|
215
|
+
self._prev_out: Union[TextIOBase, Any] = prev_stdout
|
|
216
|
+
self._record_datetime: bool = record_datetime
|
|
217
|
+
self._is_stderr: bool = is_stderr
|
|
218
|
+
|
|
219
|
+
def set_node_info(self, run_id: str, node_name: str, line_number: int) -> None:
|
|
220
|
+
"""Set node info to a context variable.
|
|
221
|
+
|
|
222
|
+
After set node info, write method will write to string IO associated with this node.
|
|
223
|
+
"""
|
|
224
|
+
run_log_info = NodeInfo(run_id, node_name, line_number)
|
|
225
|
+
self._context.set(run_log_info)
|
|
226
|
+
self.run_id_to_stdout.update({run_id: StringIO()})
|
|
227
|
+
|
|
228
|
+
def clear_node_info(self, run_id: str):
|
|
229
|
+
"""Clear context variable associated with run id."""
|
|
230
|
+
log_info: Optional[NodeInfo] = self._context.get()
|
|
231
|
+
if log_info and log_info.run_id == run_id:
|
|
232
|
+
self._context.set(None)
|
|
233
|
+
|
|
234
|
+
if run_id in self.run_id_to_stdout:
|
|
235
|
+
self.run_id_to_stdout.pop(run_id)
|
|
236
|
+
|
|
237
|
+
def get_log(self, run_id: str) -> str:
|
|
238
|
+
"""Get log associated with run id."""
|
|
239
|
+
string_io: Optional[StringIO] = self.run_id_to_stdout.get(run_id)
|
|
240
|
+
if string_io is None:
|
|
241
|
+
return ""
|
|
242
|
+
|
|
243
|
+
return string_io.getvalue()
|
|
244
|
+
|
|
245
|
+
def write(self, s: str) -> int:
|
|
246
|
+
"""Override TextIO's write method and writes input string into a string IO
|
|
247
|
+
|
|
248
|
+
The written string is compliant without any credentials.
|
|
249
|
+
The string is also recorded to flow/bulk logger.
|
|
250
|
+
If node info is not set, write to previous stdout.
|
|
251
|
+
"""
|
|
252
|
+
log_info: Optional[NodeInfo] = self._context.get()
|
|
253
|
+
s = scrub_credentials(s) # Remove credential from string.
|
|
254
|
+
if log_info is None:
|
|
255
|
+
return self._prev_out.write(s)
|
|
256
|
+
else:
|
|
257
|
+
self._write_to_flow_log(log_info, s)
|
|
258
|
+
stdout: Optional[StringIO] = self.run_id_to_stdout.get(log_info.run_id)
|
|
259
|
+
# When the line execution timeout is reached, all running nodes will be cancelled and node info will
|
|
260
|
+
# be cleared. This will remove StringIO from self.run_id_to_stdout. For sync tools running in a worker
|
|
261
|
+
# thread, they can't be stopped and self._context won't change in the worker
|
|
262
|
+
# thread because it's a thread-local variable. Therefore, we need to check if StringIO is None here.
|
|
263
|
+
if stdout is None:
|
|
264
|
+
return 0
|
|
265
|
+
if self._record_datetime and s != "\n": # For line breaker, do not add datetime prefix.
|
|
266
|
+
s = f"[{datetime.now(timezone.utc).strftime(self.DATETIME_FORMAT)}] {s}"
|
|
267
|
+
return stdout.write(s)
|
|
268
|
+
|
|
269
|
+
def flush(self):
|
|
270
|
+
"""Override TextIO's flush method."""
|
|
271
|
+
node_info: Optional[NodeInfo] = self._context.get()
|
|
272
|
+
if node_info is None:
|
|
273
|
+
self._prev_out.flush()
|
|
274
|
+
else:
|
|
275
|
+
string_io = self.run_id_to_stdout.get(node_info.run_id)
|
|
276
|
+
if string_io is not None:
|
|
277
|
+
string_io.flush()
|
|
278
|
+
|
|
279
|
+
def _write_to_flow_log(self, log_info: NodeInfo, s: str):
|
|
280
|
+
"""Save stdout log to flow_logger and stderr log to logger."""
|
|
281
|
+
# If user uses "print('log message.')" to log, then
|
|
282
|
+
# "write" method will be called twice and the second time input is only '\n'.
|
|
283
|
+
# For this case, should not log '\n' in flow_logger.
|
|
284
|
+
if s != "\n":
|
|
285
|
+
if self._is_stderr:
|
|
286
|
+
flow_log = f"[{str(log_info)}] stderr> " + s.rstrip("\n")
|
|
287
|
+
# Log stderr in all scenarios so we can diagnose problems.
|
|
288
|
+
logger.warning(flow_log)
|
|
289
|
+
else:
|
|
290
|
+
flow_log = f"[{str(log_info)}] stdout> " + s.rstrip("\n")
|
|
291
|
+
# Log stdout only in flow mode.
|
|
292
|
+
flow_logger.info(flow_log)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import contextvars
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from functools import partial
|
|
8
|
+
from typing_extensions import override
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ThreadPoolExecutorWithContext(ThreadPoolExecutor):
|
|
12
|
+
"""ThreadPoolExecutor that preserves context variables across threads."""
|
|
13
|
+
|
|
14
|
+
@override
|
|
15
|
+
def submit(self, fn, *args, **kwargs):
|
|
16
|
+
context = contextvars.copy_context()
|
|
17
|
+
return super().submit(context.run, partial(fn, *args, **kwargs))
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from azure.ai.evaluation._legacy.prompty._prompty import AsyncPrompty
|
|
6
|
+
from azure.ai.evaluation._legacy.prompty._connection import Connection, OpenAIConnection, AzureOpenAIConnection
|
|
7
|
+
from azure.ai.evaluation._legacy.prompty._exceptions import (
|
|
8
|
+
PromptyException,
|
|
9
|
+
MissingRequiredInputError,
|
|
10
|
+
InvalidInputError,
|
|
11
|
+
JinjaTemplateError,
|
|
12
|
+
NotSupportedError,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# =========================================================================================================
|
|
16
|
+
# NOTE: All of the code here is largely copy of code from Promptflow. Generally speaking, the following
|
|
17
|
+
# changes were made:
|
|
18
|
+
# - Added type annotations
|
|
19
|
+
# - Legacy or deprecated functionality has been removed (e.g. no more support for completions API)
|
|
20
|
+
# - Reworked the way images are handled to 1) Reduce the amount of code brought over, 2) remove
|
|
21
|
+
# the need to do two passes over the template to insert images, 3) remove the completely unnecessary
|
|
22
|
+
# loading of image data from the internet when it is not actually needed
|
|
23
|
+
# - Minor obvious tweaks to improve code readability, and removal of unused code paths
|
|
24
|
+
# =========================================================================================================
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"AsyncPrompty",
|
|
28
|
+
"Connection",
|
|
29
|
+
"AzureOpenAIConnection",
|
|
30
|
+
"OpenAIConnection",
|
|
31
|
+
"PromptyException",
|
|
32
|
+
"MissingRequiredInputError",
|
|
33
|
+
"InvalidInputError",
|
|
34
|
+
"JinjaTemplateError",
|
|
35
|
+
"NotSupportedError",
|
|
36
|
+
]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, ClassVar, Mapping, Optional
|
|
9
|
+
|
|
10
|
+
from azure.ai.evaluation._legacy.prompty._exceptions import MissingRequiredInputError
|
|
11
|
+
from azure.ai.evaluation._legacy.prompty._utils import dataclass_from_dict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_empty_connection_config(connection_dict: Mapping[str, Any]) -> bool:
|
|
15
|
+
return any(key not in {"azure_deployment", "model", "type"} for key in connection_dict.keys())
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Connection(ABC):
|
|
20
|
+
"""Base class for all connection classes."""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def type(self) -> str:
|
|
25
|
+
"""Gets the type of the connection.
|
|
26
|
+
|
|
27
|
+
:return: The type of the connection.
|
|
28
|
+
:rtype: str"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def parse_from_config(model_configuration: Mapping[str, Any]) -> "Connection":
|
|
33
|
+
"""Parse a connection from a model configuration.
|
|
34
|
+
|
|
35
|
+
:param model_configuration: The model configuration.
|
|
36
|
+
:type model_configuration: Mapping[str, Any]
|
|
37
|
+
:return: The connection.
|
|
38
|
+
:rtype: Connection
|
|
39
|
+
"""
|
|
40
|
+
connection_dict = {**model_configuration}
|
|
41
|
+
connection_type = connection_dict.pop("type", "")
|
|
42
|
+
|
|
43
|
+
connection: Connection
|
|
44
|
+
if connection_type in [AzureOpenAIConnection.TYPE, "azure_openai"]:
|
|
45
|
+
if not _is_empty_connection_config(connection_dict):
|
|
46
|
+
connection = AzureOpenAIConnection.from_env()
|
|
47
|
+
else:
|
|
48
|
+
connection = dataclass_from_dict(AzureOpenAIConnection, connection_dict)
|
|
49
|
+
|
|
50
|
+
elif connection_type in [OpenAIConnection.TYPE, "openai"]:
|
|
51
|
+
if not _is_empty_connection_config(connection_dict):
|
|
52
|
+
connection = OpenAIConnection.from_env()
|
|
53
|
+
else:
|
|
54
|
+
connection = dataclass_from_dict(OpenAIConnection, connection_dict)
|
|
55
|
+
|
|
56
|
+
else:
|
|
57
|
+
error_message = (
|
|
58
|
+
f"'{connection_type}' is not a supported connection type. Valid values are "
|
|
59
|
+
f"[{AzureOpenAIConnection.TYPE}, {OpenAIConnection.TYPE}]"
|
|
60
|
+
)
|
|
61
|
+
raise MissingRequiredInputError(error_message)
|
|
62
|
+
|
|
63
|
+
return connection
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class OpenAIConnection(Connection):
|
|
68
|
+
"""Connection class for OpenAI endpoints."""
|
|
69
|
+
|
|
70
|
+
base_url: str
|
|
71
|
+
model: str
|
|
72
|
+
api_key: Optional[str] = None
|
|
73
|
+
organization: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
TYPE: ClassVar[str] = "openai"
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def type(self) -> str:
|
|
79
|
+
return OpenAIConnection.TYPE
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def from_env(cls) -> "OpenAIConnection":
|
|
83
|
+
return cls(
|
|
84
|
+
base_url=os.environ.get("OPENAI_BASE_URL", ""),
|
|
85
|
+
model=os.environ.get("OPENAI_MODEL", ""),
|
|
86
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
87
|
+
organization=os.environ.get("OPENAI_ORG_ID"),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class AzureOpenAIConnection(Connection):
|
|
93
|
+
"""Connection class for Azure OpenAI endpoints."""
|
|
94
|
+
|
|
95
|
+
azure_endpoint: str
|
|
96
|
+
azure_deployment: str
|
|
97
|
+
api_key: Optional[str] = None
|
|
98
|
+
api_version: Optional[str] = None
|
|
99
|
+
resource_id: Optional[str] = None
|
|
100
|
+
|
|
101
|
+
TYPE: ClassVar[str] = "azure_openai"
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def type(self) -> str:
|
|
105
|
+
return AzureOpenAIConnection.TYPE
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def from_env(cls) -> "AzureOpenAIConnection":
|
|
109
|
+
return cls(
|
|
110
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
|
|
111
|
+
azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT", ""),
|
|
112
|
+
api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
|
|
113
|
+
api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-02-01"),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def __post_init__(self):
|
|
117
|
+
# set default API version
|
|
118
|
+
if not self.api_version:
|
|
119
|
+
self.api_version = "2024-02-01"
|