azure-ai-evaluation 1.0.1__py3-none-any.whl → 1.13.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +85 -14
- azure/ai/evaluation/_aoai/__init__.py +10 -0
- azure/ai/evaluation/_aoai/aoai_grader.py +140 -0
- azure/ai/evaluation/_aoai/label_grader.py +68 -0
- azure/ai/evaluation/_aoai/python_grader.py +86 -0
- azure/ai/evaluation/_aoai/score_model_grader.py +94 -0
- azure/ai/evaluation/_aoai/string_check_grader.py +66 -0
- azure/ai/evaluation/_aoai/text_similarity_grader.py +80 -0
- azure/ai/evaluation/_azure/__init__.py +3 -0
- azure/ai/evaluation/_azure/_clients.py +204 -0
- azure/ai/evaluation/_azure/_envs.py +207 -0
- azure/ai/evaluation/_azure/_models.py +227 -0
- azure/ai/evaluation/_azure/_token_manager.py +129 -0
- azure/ai/evaluation/_common/__init__.py +9 -1
- azure/ai/evaluation/_common/constants.py +124 -2
- azure/ai/evaluation/_common/evaluation_onedp_client.py +169 -0
- azure/ai/evaluation/_common/onedp/__init__.py +32 -0
- azure/ai/evaluation/_common/onedp/_client.py +166 -0
- azure/ai/evaluation/_common/onedp/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/_model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/_serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_types.py +21 -0
- azure/ai/evaluation/_common/onedp/_utils/__init__.py +6 -0
- azure/ai/evaluation/_common/onedp/_utils/model_base.py +1232 -0
- azure/ai/evaluation/_common/onedp/_utils/serialization.py +2032 -0
- azure/ai/evaluation/_common/onedp/_validation.py +66 -0
- azure/ai/evaluation/_common/onedp/_vendor.py +50 -0
- azure/ai/evaluation/_common/onedp/_version.py +9 -0
- azure/ai/evaluation/_common/onedp/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/onedp/aio/_client.py +168 -0
- azure/ai/evaluation/_common/onedp/aio/_configuration.py +72 -0
- azure/ai/evaluation/_common/onedp/aio/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/aio/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_operations.py +7143 -0
- azure/ai/evaluation/_common/onedp/aio/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/models/__init__.py +358 -0
- azure/ai/evaluation/_common/onedp/models/_enums.py +447 -0
- azure/ai/evaluation/_common/onedp/models/_models.py +5963 -0
- azure/ai/evaluation/_common/onedp/models/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/operations/__init__.py +49 -0
- azure/ai/evaluation/_common/onedp/operations/_operations.py +8951 -0
- azure/ai/evaluation/_common/onedp/operations/_patch.py +21 -0
- azure/ai/evaluation/_common/onedp/py.typed +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/__init__.py +1 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/__init__.py +22 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_operations.py +29 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/buildingblocks/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_operations.py +34 -0
- azure/ai/evaluation/_common/onedp/servicepatterns/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/rai_service.py +578 -69
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1238 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +505 -27
- azure/ai/evaluation/_constants.py +147 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +899 -0
- azure/ai/evaluation/_converters/_models.py +467 -0
- azure/ai/evaluation/_converters/_sk_services.py +495 -0
- azure/ai/evaluation/_eval_mapping.py +87 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -2
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +176 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +19 -6
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +47 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +18 -2
- azure/ai/evaluation/_evaluate/_eval_run.py +32 -46
- azure/ai/evaluation/_evaluate/_evaluate.py +1809 -142
- azure/ai/evaluation/_evaluate/_evaluate_aoai.py +992 -0
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -90
- azure/ai/evaluation/_evaluate/_utils.py +237 -42
- azure/ai/evaluation/_evaluator_definition.py +76 -0
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +80 -28
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +119 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +40 -4
- azure/ai/evaluation/_evaluators/_common/__init__.py +2 -0
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +430 -29
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +63 -0
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +269 -12
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +74 -9
- azure/ai/evaluation/_evaluators/_common/_conversation_aggregators.py +49 -0
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +73 -53
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +26 -5
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +35 -5
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +34 -4
- azure/ai/evaluation/_evaluators/_document_retrieval/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_document_retrieval/_document_retrieval.py +442 -0
- azure/ai/evaluation/_evaluators/_eci/_eci.py +6 -3
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +97 -70
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +39 -3
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +80 -25
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +230 -20
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_with_query.prompty +30 -29
- azure/ai/evaluation/_evaluators/_groundedness/groundedness_without_query.prompty +19 -14
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +196 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +275 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +89 -36
- azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +22 -4
- azure/ai/evaluation/_evaluators/_qa/_qa.py +94 -35
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +100 -4
- azure/ai/evaluation/_evaluators/_relevance/relevance.prompty +154 -56
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +202 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +84 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +39 -3
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +166 -26
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +38 -7
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +81 -85
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +226 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +101 -0
- azure/ai/evaluation/_evaluators/_task_completion/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py +177 -0
- azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty +220 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_navigation_efficiency/_task_navigation_efficiency.py +384 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +298 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +166 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py +306 -0
- azure/ai/evaluation/_evaluators/_tool_call_success/tool_call_success.prompty +321 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py +263 -0
- azure/ai/evaluation/_evaluators/_tool_input_accuracy/tool_input_accuracy.prompty +76 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/_tool_output_utilization.py +225 -0
- azure/ai/evaluation/_evaluators/_tool_output_utilization/tool_output_utilization.prompty +221 -0
- azure/ai/evaluation/_evaluators/_tool_selection/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_selection/_tool_selection.py +266 -0
- azure/ai/evaluation/_evaluators/_tool_selection/tool_selection.prompty +104 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +102 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +20 -4
- azure/ai/evaluation/_exceptions.py +24 -1
- azure/ai/evaluation/_http_utils.py +7 -5
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +7 -0
- azure/ai/evaluation/_legacy/_adapters/_check.py +17 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +48 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +477 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +132 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +107 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +127 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +262 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +97 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/_common/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_common/_async_token_provider.py +117 -0
- azure/ai/evaluation/_legacy/_common/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_common/_thread_pool_executor_with_context.py +17 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +119 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +139 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +430 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +663 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_model_configurations.py +26 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +3 -0
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +917 -0
- azure/ai/evaluation/_user_agent.py +32 -1
- azure/ai/evaluation/_vendor/rouge_score/rouge_scorer.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/scoring.py +0 -4
- azure/ai/evaluation/_vendor/rouge_score/tokenize.py +0 -4
- azure/ai/evaluation/_version.py +2 -1
- azure/ai/evaluation/red_team/__init__.py +22 -0
- azure/ai/evaluation/red_team/_agent/__init__.py +3 -0
- azure/ai/evaluation/red_team/_agent/_agent_functions.py +261 -0
- azure/ai/evaluation/red_team/_agent/_agent_tools.py +461 -0
- azure/ai/evaluation/red_team/_agent/_agent_utils.py +89 -0
- azure/ai/evaluation/red_team/_agent/_semantic_kernel_plugin.py +228 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +268 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +49 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +115 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_evaluation_processor.py +505 -0
- azure/ai/evaluation/red_team/_mlflow_integration.py +430 -0
- azure/ai/evaluation/red_team/_orchestrator_manager.py +803 -0
- azure/ai/evaluation/red_team/_red_team.py +1717 -0
- azure/ai/evaluation/red_team/_red_team_result.py +661 -0
- azure/ai/evaluation/red_team/_result_processor.py +1708 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +37 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_eval_chat_target.py +128 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_target.py +601 -0
- azure/ai/evaluation/red_team/_utils/_rai_service_true_false_scorer.py +114 -0
- azure/ai/evaluation/red_team/_utils/constants.py +72 -0
- azure/ai/evaluation/red_team/_utils/exception_utils.py +345 -0
- azure/ai/evaluation/red_team/_utils/file_utils.py +266 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +365 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/metric_mapping.py +73 -0
- azure/ai/evaluation/red_team/_utils/objective_utils.py +46 -0
- azure/ai/evaluation/red_team/_utils/progress_utils.py +252 -0
- azure/ai/evaluation/red_team/_utils/retry_utils.py +218 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +218 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +6 -0
- azure/ai/evaluation/simulator/_adversarial_simulator.py +187 -80
- azure/ai/evaluation/simulator/_constants.py +1 -0
- azure/ai/evaluation/simulator/_conversation/__init__.py +138 -11
- azure/ai/evaluation/simulator/_conversation/_conversation.py +6 -2
- azure/ai/evaluation/simulator/_conversation/constants.py +1 -1
- azure/ai/evaluation/simulator/_direct_attack_simulator.py +37 -24
- azure/ai/evaluation/simulator/_helpers/_language_suffix_mapping.py +1 -0
- azure/ai/evaluation/simulator/_indirect_attack_simulator.py +56 -28
- azure/ai/evaluation/simulator/_model_tools/__init__.py +2 -1
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +225 -0
- azure/ai/evaluation/simulator/_model_tools/_identity_manager.py +12 -10
- azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py +100 -45
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +101 -3
- azure/ai/evaluation/simulator/_model_tools/_template_handler.py +31 -11
- azure/ai/evaluation/simulator/_model_tools/models.py +20 -17
- azure/ai/evaluation/simulator/_simulator.py +43 -19
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/METADATA +378 -27
- azure_ai_evaluation-1.13.5.dist-info/RECORD +305 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/_evaluators/_multimodal/__init__.py +0 -20
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py +0 -132
- azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py +0 -55
- azure/ai/evaluation/_evaluators/_multimodal/_hate_unfairness.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_protected_material.py +0 -124
- azure/ai/evaluation/_evaluators/_multimodal/_self_harm.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_sexual.py +0 -100
- azure/ai/evaluation/_evaluators/_multimodal/_violence.py +0 -100
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.0.1.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info/licenses}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.0.1.dist-info → azure_ai_evaluation-1.13.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# cspell:ignore apng, retriable
|
|
6
|
+
|
|
7
|
+
import copy
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import json
|
|
11
|
+
import base64
|
|
12
|
+
from dataclasses import dataclass, is_dataclass, fields
|
|
13
|
+
from logging import Logger
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import (
|
|
16
|
+
Any,
|
|
17
|
+
AsyncGenerator,
|
|
18
|
+
Dict,
|
|
19
|
+
Final,
|
|
20
|
+
List,
|
|
21
|
+
Mapping,
|
|
22
|
+
MutableMapping,
|
|
23
|
+
Optional,
|
|
24
|
+
Sequence,
|
|
25
|
+
Set,
|
|
26
|
+
Tuple,
|
|
27
|
+
Type,
|
|
28
|
+
TypeVar,
|
|
29
|
+
Union,
|
|
30
|
+
cast,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
from jinja2 import Template
|
|
34
|
+
from openai import AsyncStream
|
|
35
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionUserMessageParam
|
|
36
|
+
from openai import APIConnectionError, APIStatusError, APITimeoutError, OpenAIError
|
|
37
|
+
|
|
38
|
+
from azure.ai.evaluation._constants import DefaultOpenEncoding
|
|
39
|
+
from azure.ai.evaluation._legacy.prompty._exceptions import (
|
|
40
|
+
InvalidInputError,
|
|
41
|
+
JinjaTemplateError,
|
|
42
|
+
PromptyException,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
from azure.ai.evaluation._legacy.prompty._yaml_utils import load_yaml
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# region: Resolving references
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class PromptyModelConfiguration:
|
|
53
|
+
"""
|
|
54
|
+
A dataclass that represents a model config of prompty.
|
|
55
|
+
|
|
56
|
+
:param api: Type of the LLM request, default value is chat.
|
|
57
|
+
:type api: str
|
|
58
|
+
:param configuration: Prompty model connection configuration
|
|
59
|
+
:type configuration: dict
|
|
60
|
+
:param parameters: Params of the LLM request.
|
|
61
|
+
:type parameters: dict
|
|
62
|
+
:param response: Return the complete response or the first choice, default value is first.
|
|
63
|
+
:type response: str
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
configuration: dict
|
|
67
|
+
parameters: Dict[str, Any]
|
|
68
|
+
response: str = "first"
|
|
69
|
+
model: Optional[str] = None
|
|
70
|
+
# _overflow: Dict[str, Any] = field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
def __post_init__(self):
|
|
73
|
+
if not isinstance(self.configuration, dict):
|
|
74
|
+
raise PromptyException("The configuration of the model must be a dictionary.")
|
|
75
|
+
|
|
76
|
+
if not self.model:
|
|
77
|
+
self.model = self.configuration.get("azure_deployment", None) or self.configuration.get("model", None)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
T = TypeVar("T")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def dataclass_from_dict(cls: Type[T], data: Dict[str, Any]) -> T:
|
|
84
|
+
"""Helper function to make creating dataclass instances from dictionaries easier.
|
|
85
|
+
Unlike using cls(**data), this function will ignore any keys in the dictionary that
|
|
86
|
+
are not fields in the dataclass. If the dataclass optionally contains an _overflow
|
|
87
|
+
field, any extra key/value paris will be placed in that field.
|
|
88
|
+
|
|
89
|
+
This does no type checking and inspects only the key names.
|
|
90
|
+
|
|
91
|
+
:param Type[T] cls: The dataclass type to create.
|
|
92
|
+
:param Dict[str, Any] data: The dictionary to create the dataclass instance from.
|
|
93
|
+
:return: The dataclass instance.
|
|
94
|
+
:rtype: T
|
|
95
|
+
"""
|
|
96
|
+
if not is_dataclass(cls):
|
|
97
|
+
raise ValueError("This function only works with @dataclass Types")
|
|
98
|
+
|
|
99
|
+
fields_set: Set[str] = {f.name for f in fields(cls)}
|
|
100
|
+
|
|
101
|
+
params: Dict[str, Any] = {}
|
|
102
|
+
overflow: Dict[str, Any] = {}
|
|
103
|
+
|
|
104
|
+
for key, value in data.items():
|
|
105
|
+
if key in fields_set:
|
|
106
|
+
params[key] = value
|
|
107
|
+
else:
|
|
108
|
+
overflow[key] = value
|
|
109
|
+
|
|
110
|
+
if "_overflow" in fields_set:
|
|
111
|
+
params["_overflow"] = overflow
|
|
112
|
+
|
|
113
|
+
return cast(T, cls(**params))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def resolve_references(origin: Mapping[str, Any], base_path: Optional[Path] = None) -> Dict[str, Any]:
|
|
117
|
+
"""Resolve all reference in the object.
|
|
118
|
+
|
|
119
|
+
:param Mapping[str, Any] origin: The object to resolve.
|
|
120
|
+
:param Path|None base_path: The base path to resolve the file reference.
|
|
121
|
+
:return: The resolved object.
|
|
122
|
+
:rtype: Dict[str, Any]
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def _resolve_references(origin: Any, base_path: Optional[Path] = None) -> Any:
|
|
126
|
+
if isinstance(origin, str):
|
|
127
|
+
return _resolve_reference(origin, base_path=base_path)
|
|
128
|
+
if isinstance(origin, list):
|
|
129
|
+
return [_resolve_references(item, base_path=base_path) for item in origin]
|
|
130
|
+
if isinstance(origin, dict):
|
|
131
|
+
return {key: _resolve_references(value, base_path=base_path) for key, value in origin.items()}
|
|
132
|
+
return origin
|
|
133
|
+
|
|
134
|
+
return {k: _resolve_references(v, base_path=base_path) for k, v in origin.items()}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _resolve_reference(reference: str, base_path: Optional[Path] = None) -> Union[str, dict]:
|
|
138
|
+
"""
|
|
139
|
+
Resolve the reference, two types are supported, env, file.
|
|
140
|
+
When the string format is ${env:ENV_NAME}, the environment variable value will be returned.
|
|
141
|
+
When the string format is ${file:file_path}, return the loaded json object.
|
|
142
|
+
|
|
143
|
+
:param str reference: The reference string.
|
|
144
|
+
:param Path|None base_path: The base path to resolve the file reference.
|
|
145
|
+
:return: The resolved reference.
|
|
146
|
+
:rtype: str | dict
|
|
147
|
+
"""
|
|
148
|
+
pattern = r"\$\{(\w+):(.*)\}"
|
|
149
|
+
match = re.match(pattern, reference)
|
|
150
|
+
if match:
|
|
151
|
+
reference_type, value = match.groups()
|
|
152
|
+
if reference_type == "env":
|
|
153
|
+
return os.environ.get(value, reference)
|
|
154
|
+
|
|
155
|
+
if reference_type == "file":
|
|
156
|
+
if not Path(value).is_absolute() and base_path:
|
|
157
|
+
path = Path(base_path) / value
|
|
158
|
+
else:
|
|
159
|
+
path = Path(value)
|
|
160
|
+
|
|
161
|
+
if not path.exists():
|
|
162
|
+
raise PromptyException(f"Cannot find the reference file {value}.")
|
|
163
|
+
|
|
164
|
+
with open(path, "r", encoding=DefaultOpenEncoding.READ) as f:
|
|
165
|
+
if path.suffix.lower() == ".json":
|
|
166
|
+
return json.load(f)
|
|
167
|
+
if path.suffix.lower() in [".yml", ".yaml"]:
|
|
168
|
+
return load_yaml(f)
|
|
169
|
+
return f.read()
|
|
170
|
+
|
|
171
|
+
# TODO ralphe: logging?
|
|
172
|
+
# logger.warning(f"Unknown reference type {reference_type}, return original value {reference}.")
|
|
173
|
+
return reference
|
|
174
|
+
|
|
175
|
+
return reference
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def update_dict_recursively(origin_dict: Mapping[str, Any], overwrite_dict: Mapping[str, Any]) -> Dict[str, Any]:
|
|
179
|
+
updated_dict: Dict[str, Any] = {}
|
|
180
|
+
for k, v in overwrite_dict.items():
|
|
181
|
+
if isinstance(v, dict):
|
|
182
|
+
updated_dict[k] = update_dict_recursively(origin_dict.get(k, {}), v)
|
|
183
|
+
else:
|
|
184
|
+
updated_dict[k] = v
|
|
185
|
+
for k, v in origin_dict.items():
|
|
186
|
+
if k not in updated_dict:
|
|
187
|
+
updated_dict[k] = v
|
|
188
|
+
return updated_dict
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# endregion
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# region: Jinja template rendering
|
|
195
|
+
|
|
196
|
+
VALID_ROLES = ["system", "user", "assistant", "function"]
|
|
197
|
+
"""Valid roles for the OpenAI Chat API"""
|
|
198
|
+
|
|
199
|
+
PROMPTY_ROLE_SEPARATOR_PATTERN = re.compile(
|
|
200
|
+
r"(?i)^\s*#?\s*(" + "|".join(VALID_ROLES) + r")\s*:\s*\n", flags=re.MULTILINE
|
|
201
|
+
)
|
|
202
|
+
"""Pattern to match the role separator in a prompty template"""
|
|
203
|
+
|
|
204
|
+
MARKDOWN_IMAGE_PATTERN = re.compile(r"(?P<match>!\[[^\]]*\]\(.*?(?=\"|\))\))", flags=re.MULTILINE)
|
|
205
|
+
"""Pattern to match markdown syntax for embedding images such as .
|
|
206
|
+
This uses a 'hack' where by naming the capture group, using re.split() will cause
|
|
207
|
+
the named capture group to appear in the list of split parts"""
|
|
208
|
+
|
|
209
|
+
IMAGE_URL_PARSING_PATTERN = re.compile(
|
|
210
|
+
r"^!\[(?P<alt_text>[^\]]+)\]\((?P<link>(?P<scheme>[^:]+(?=:))?:?(?P<mime_type>[^;]+(?=;))?;?(?P<data>[^\)]*))\)$"
|
|
211
|
+
)
|
|
212
|
+
"""Pattern used to parse the image URL from the markdown syntax. This caputres the following groups:
|
|
213
|
+
- alt_text: The alt text for the image
|
|
214
|
+
- link: The full link
|
|
215
|
+
- scheme: The scheme used in the link (e.g. data, http, https)
|
|
216
|
+
- mime_type: The mime type of the image (only for data URLs)
|
|
217
|
+
- data: The data part of the URL (only for data URLs)
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
DEFAULT_IMAGE_MIME_TYPE: Final[str] = "image/*"
|
|
221
|
+
"""The mime type to use when we don't know the image type"""
|
|
222
|
+
|
|
223
|
+
FILE_EXT_TO_MIME: Final[Mapping[str, str]] = {
|
|
224
|
+
".apng": "image/apng",
|
|
225
|
+
".avif": "image/avif",
|
|
226
|
+
".bmp": "image/bmp",
|
|
227
|
+
".gif": "image/gif",
|
|
228
|
+
".heic": "image/heic",
|
|
229
|
+
".heif": "image/heif",
|
|
230
|
+
".ico": "image/vnd.microsoft.icon",
|
|
231
|
+
".jpg": "image/jpeg",
|
|
232
|
+
".jpeg": "image/jpeg",
|
|
233
|
+
".png": "image/png",
|
|
234
|
+
".svg": "image/svg+xml",
|
|
235
|
+
".tif": "image/tiff",
|
|
236
|
+
".tiff": "image/tiff",
|
|
237
|
+
".webp": "image/webp",
|
|
238
|
+
}
|
|
239
|
+
"""Mapping of file extensions to mime types"""
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def render_jinja_template(template_str: str, *, trim_blocks=True, keep_trailing_newline=True, **kwargs) -> str:
|
|
243
|
+
try:
|
|
244
|
+
template = Template(template_str, trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline)
|
|
245
|
+
return template.render(**kwargs)
|
|
246
|
+
except Exception as e: # pylint: disable=broad-except
|
|
247
|
+
raise PromptyException(f"Failed to render jinja template - {type(e).__name__}: {str(e)}") from e
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def build_messages(
|
|
251
|
+
*, prompt: str, working_dir: Path, image_detail: str = "auto", **kwargs: Any
|
|
252
|
+
) -> Sequence[Mapping[str, Any]]:
|
|
253
|
+
# keep_trailing_newline=True is to keep the last \n in the prompt to avoid converting "user:\t\n" to "user:".
|
|
254
|
+
chat_str = render_jinja_template(prompt, trim_blocks=True, keep_trailing_newline=True, **kwargs)
|
|
255
|
+
messages = _parse_chat(chat_str, working_dir, image_detail)
|
|
256
|
+
return messages
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _parse_chat(chat_str: str, working_dir: Path, image_detail: str) -> Sequence[Mapping[str, Any]]:
|
|
260
|
+
# openai chat api only supports VALID_ROLES as role names.
|
|
261
|
+
# customer can add single # in front of role name for markdown highlight.
|
|
262
|
+
# and we still support role name without # prefix for backward compatibility.
|
|
263
|
+
|
|
264
|
+
chunks = re.split(PROMPTY_ROLE_SEPARATOR_PATTERN, chat_str)
|
|
265
|
+
chat_list: List[Dict[str, Any]] = []
|
|
266
|
+
|
|
267
|
+
for chunk in chunks:
|
|
268
|
+
last_message = chat_list[-1] if len(chat_list) > 0 else None
|
|
269
|
+
|
|
270
|
+
# =======================================================================================================
|
|
271
|
+
# NOTE: The Promptflow code supported tool calls but used eval() to parse them. This is an unacceptable
|
|
272
|
+
# security risk. Since none of the current evaluators use tool calls, this functionality has been
|
|
273
|
+
# removed.
|
|
274
|
+
# =======================================================================================================
|
|
275
|
+
|
|
276
|
+
# if is_tool_chunk(last_message):
|
|
277
|
+
# parse_tools(last_message, chunk, hash2images, image_detail)
|
|
278
|
+
# continue
|
|
279
|
+
# if last_message and "role" in last_message and last_message["role"] == "assistant":
|
|
280
|
+
# parsed_result = _try_parse_tool_calls(chunk)
|
|
281
|
+
# if parsed_result is not None:
|
|
282
|
+
# last_message["tool_calls"] = parsed_result
|
|
283
|
+
# continue
|
|
284
|
+
|
|
285
|
+
if (
|
|
286
|
+
last_message
|
|
287
|
+
and "role" in last_message # pylint: disable=unsupported-membership-test
|
|
288
|
+
and "content" not in last_message # pylint: disable=unsupported-membership-test
|
|
289
|
+
and "tool_calls" not in last_message # pylint: disable=unsupported-membership-test
|
|
290
|
+
):
|
|
291
|
+
parsed_result = _try_parse_name_and_content(chunk)
|
|
292
|
+
if parsed_result is None:
|
|
293
|
+
if last_message["role"] == "function": # pylint: disable=unsubscriptable-object
|
|
294
|
+
# "name" is required if the role is "function"
|
|
295
|
+
raise JinjaTemplateError(
|
|
296
|
+
"Failed to parse function role prompt. Please make sure the prompt follows the "
|
|
297
|
+
"format: 'name:\\nfunction_name\\ncontent:\\nfunction_content'. "
|
|
298
|
+
"'name' is required if role is function, and it should be the name of the function "
|
|
299
|
+
"whose response is in the content. May contain a-z, A-Z, 0-9, and underscores, "
|
|
300
|
+
"with a maximum length of 64 characters. See more details in "
|
|
301
|
+
"https://platform.openai.com/docs/api-reference/chat/create#chat/create-name"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# "name" is optional for other role types.
|
|
305
|
+
last_message["content"] = _to_content_str_or_list( # pylint: disable=unsupported-assignment-operation
|
|
306
|
+
chunk, working_dir, image_detail
|
|
307
|
+
)
|
|
308
|
+
else:
|
|
309
|
+
last_message["name"] = parsed_result[0] # pylint: disable=unsupported-assignment-operation
|
|
310
|
+
last_message["content"] = _to_content_str_or_list( # pylint: disable=unsupported-assignment-operation
|
|
311
|
+
parsed_result[1], working_dir, image_detail
|
|
312
|
+
)
|
|
313
|
+
else:
|
|
314
|
+
if chunk.strip() == "":
|
|
315
|
+
continue
|
|
316
|
+
# Check if prompt follows chat api message format and has valid role.
|
|
317
|
+
# References: https://platform.openai.com/docs/api-reference/chat/create.
|
|
318
|
+
role = chunk.strip().lower()
|
|
319
|
+
_validate_role(role)
|
|
320
|
+
new_message = {"role": role}
|
|
321
|
+
chat_list.append(new_message)
|
|
322
|
+
return chat_list
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _validate_role(role: str):
|
|
326
|
+
if role not in VALID_ROLES:
|
|
327
|
+
valid_roles_str = ", ".join(VALID_ROLES)
|
|
328
|
+
error_message = (
|
|
329
|
+
f"The Chat API requires a specific format for prompt definition, and the prompt should include separate "
|
|
330
|
+
f"lines as role delimiters: {valid_roles_str}.\n"
|
|
331
|
+
f"Current parsed role '{role}' does not meet the requirement. If you intend to use the Completion API, "
|
|
332
|
+
f"please select the appropriate API type and deployment name."
|
|
333
|
+
)
|
|
334
|
+
raise JinjaTemplateError(message=error_message)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _to_content_str_or_list(chat_str: str, working_dir: Path, image_detail: str) -> Union[str, List[Dict[str, Any]]]:
|
|
338
|
+
chunks = [c for c in (chunk.strip() for chunk in re.split(MARKDOWN_IMAGE_PATTERN, chat_str)) if c]
|
|
339
|
+
if len(chunks) <= 1:
|
|
340
|
+
return chat_str.strip()
|
|
341
|
+
|
|
342
|
+
messages: List[Dict[str, Any]] = []
|
|
343
|
+
for chunk in chunks:
|
|
344
|
+
if chunk.startswith("![") and chunk.endswith(")"):
|
|
345
|
+
messages.append(_inline_image(chunk, working_dir, image_detail))
|
|
346
|
+
else:
|
|
347
|
+
messages.append({"type": "text", "text": chunk})
|
|
348
|
+
return messages
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _inline_image(image: str, working_dir: Path, image_detail: str) -> Dict[str, Any]:
|
|
352
|
+
"""This accepts an image URL in markdown format, and parses that into a message containing the image details
|
|
353
|
+
to be sent to AI service. In the case of local file images, they will be loaded and their contents encoded
|
|
354
|
+
into a base 64 data URI. Internal URLs will remained untouched. It can can accept http(s), ftp(s), as well
|
|
355
|
+
as data URIs.
|
|
356
|
+
|
|
357
|
+
:param str image: The image URL in markdown format (e.g. )
|
|
358
|
+
:param Path working_dir: The working directory to use when resolving relative file paths
|
|
359
|
+
:param str image_detail: The image detail to use when sending the image to the AI service
|
|
360
|
+
:return: The image message to send to the AI service
|
|
361
|
+
:rtype: Mapping[str, Any]"""
|
|
362
|
+
|
|
363
|
+
def local_to_base64(local_file: str, mime_type: Optional[str]) -> str:
|
|
364
|
+
path = Path(local_file)
|
|
365
|
+
if not path.is_absolute():
|
|
366
|
+
path = working_dir / local_file
|
|
367
|
+
if not path.exists():
|
|
368
|
+
# TODO ralphe logging?
|
|
369
|
+
# logger.warning(f"Cannot find the image path {image_content},
|
|
370
|
+
# it will be regarded as {type(image_str)}.")
|
|
371
|
+
raise InvalidInputError(f"Cannot find the image path '{path.as_posix()}'")
|
|
372
|
+
|
|
373
|
+
base64_encoded = base64.b64encode(path.read_bytes()).decode("utf-8")
|
|
374
|
+
if not mime_type:
|
|
375
|
+
mime_type = FILE_EXT_TO_MIME.get(path.suffix.lower(), DEFAULT_IMAGE_MIME_TYPE)
|
|
376
|
+
return f"data:{mime_type};base64,{base64_encoded}"
|
|
377
|
+
|
|
378
|
+
match = re.match(IMAGE_URL_PARSING_PATTERN, image)
|
|
379
|
+
if not match:
|
|
380
|
+
raise InvalidInputError(f"Invalid image URL '{image}'")
|
|
381
|
+
|
|
382
|
+
inlined_uri: str
|
|
383
|
+
mime_type: Optional[str] = None
|
|
384
|
+
|
|
385
|
+
scheme: str = (match.group("scheme") or "").strip().lower()
|
|
386
|
+
if scheme in ["http", "https", "ftp", "ftps"]:
|
|
387
|
+
# nothing special to do here, pass through full URI as is
|
|
388
|
+
inlined_uri = (match.group("link") or "").strip()
|
|
389
|
+
elif scheme == "data":
|
|
390
|
+
mime_type = (match.group("mime_type") or "").strip()
|
|
391
|
+
data: str = (match.group("data") or "").strip()
|
|
392
|
+
|
|
393
|
+
# data urls may contain local paths too
|
|
394
|
+
if data[:5].lower() == "path:":
|
|
395
|
+
inlined_uri = local_to_base64(data[5:].strip(), mime_type)
|
|
396
|
+
elif data[:6].lower() == "base64":
|
|
397
|
+
# nothing special to do here, pass through full URI as is
|
|
398
|
+
inlined_uri = (match.group("link") or "").strip()
|
|
399
|
+
else:
|
|
400
|
+
raise InvalidInputError(f"Invalid image data URL '{image}'")
|
|
401
|
+
else:
|
|
402
|
+
# assume it's a file path
|
|
403
|
+
inlined_uri = local_to_base64((match.group("link") or "").strip(), mime_type)
|
|
404
|
+
|
|
405
|
+
if not inlined_uri:
|
|
406
|
+
raise InvalidInputError(f"Failed to determine how to inline the following image URL '{image}'")
|
|
407
|
+
|
|
408
|
+
return {
|
|
409
|
+
"type": "image_url",
|
|
410
|
+
"image_url": {
|
|
411
|
+
"url": inlined_uri,
|
|
412
|
+
"detail": image_detail,
|
|
413
|
+
},
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def _try_parse_name_and_content(role_prompt: str) -> Optional[Tuple[str, str]]:
|
|
418
|
+
# customer can add ## in front of name/content for markdown highlight.
|
|
419
|
+
# and we still support name/content without ## prefix for backward compatibility.
|
|
420
|
+
# TODO ralphe: This maybe has something to do with parsing functions or tool calls but I'm not sure
|
|
421
|
+
pattern = r"\n*#{0,2}\s*name\s*:\s*\n+\s*(\S+)\s*\n*#{0,2}\s*content\s*:\s*\n?(.*)"
|
|
422
|
+
match = re.search(pattern, role_prompt, re.DOTALL)
|
|
423
|
+
if match:
|
|
424
|
+
return match.group(1), match.group(2)
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
# endregion
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
# region OpenAI connections and requests
|
|
432
|
+
|
|
433
|
+
OpenAIChatResponseType = Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def prepare_open_ai_request_params(
|
|
437
|
+
model_config: PromptyModelConfiguration, template: Union[str, Sequence[Mapping[str, Any]]]
|
|
438
|
+
) -> MutableMapping[str, Any]:
|
|
439
|
+
params = copy.deepcopy(model_config.parameters)
|
|
440
|
+
# if isinstance(connection, AzureOpenAIConnection):
|
|
441
|
+
# params.setdefault("extra_headers", {}).update({"ms-azure-ai-promptflow-called-from": "promptflow-core"})
|
|
442
|
+
params["model"] = model_config.model
|
|
443
|
+
params["messages"] = template
|
|
444
|
+
|
|
445
|
+
# NOTE:
|
|
446
|
+
# - Tool calls have been disabled due to a security issue in the implementation. See comment earlier in
|
|
447
|
+
# this file for more details
|
|
448
|
+
# - Removing the validation of function calls in favour of letting the service do that validation. This
|
|
449
|
+
# removes a maintenance burden from the SDK should the service definition for function calls change.
|
|
450
|
+
|
|
451
|
+
# # functions and function_call are deprecated and are replaced by tools and tool_choice.
|
|
452
|
+
# # if both are provided, tools and tool_choice are used and functions and function_call are ignored.
|
|
453
|
+
# if "tools" in params:
|
|
454
|
+
# validate_tools(params["tools"])
|
|
455
|
+
# params["tool_choice"] = validate_tool_choice(params.get("tool_choice", None))
|
|
456
|
+
# else:
|
|
457
|
+
# if "functions" in params:
|
|
458
|
+
# _validate_functions(params["functions"])
|
|
459
|
+
# params["function_call"] = validate_function_call(params.get("function_call", None))
|
|
460
|
+
|
|
461
|
+
return params
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
async def format_llm_response(
|
|
465
|
+
response: OpenAIChatResponseType,
|
|
466
|
+
is_first_choice: bool,
|
|
467
|
+
response_format: Optional[Mapping[str, Any]] = None,
|
|
468
|
+
outputs: Optional[Mapping[str, Any]] = None,
|
|
469
|
+
inputs: Optional[Mapping[str, Any]] = None,
|
|
470
|
+
) -> dict:
|
|
471
|
+
"""
|
|
472
|
+
Format LLM response
|
|
473
|
+
|
|
474
|
+
If is_first_choice is false, it will directly return LLM response.
|
|
475
|
+
If is_first_choice is true, behavior as blow:
|
|
476
|
+
response_format: type: text
|
|
477
|
+
- n: None/1/2
|
|
478
|
+
Return the first choice content. Return type is string.
|
|
479
|
+
- stream: True
|
|
480
|
+
Return generator list of first choice content. Return type is generator[str]
|
|
481
|
+
response_format: type: json_object
|
|
482
|
+
- n : None/1/2
|
|
483
|
+
Return json dict of the first choice. Return type is dict
|
|
484
|
+
- stream: True
|
|
485
|
+
Return json dict of the first choice. Return type is dict
|
|
486
|
+
- outputs
|
|
487
|
+
Extract corresponding output in the json dict to the first choice. Return type is dict.
|
|
488
|
+
|
|
489
|
+
:param response: LLM response.
|
|
490
|
+
:type response:
|
|
491
|
+
:param is_first_choice: If true, it will return the first item in response choices, else it will return all response
|
|
492
|
+
:type is_first_choice: bool
|
|
493
|
+
:param response_format: An object specifying the format that the model must output.
|
|
494
|
+
:type response_format: str
|
|
495
|
+
:param outputs: Extract corresponding output in json format response
|
|
496
|
+
:type outputs: dict
|
|
497
|
+
:return: Formatted LLM response.
|
|
498
|
+
:rtype: Union[str, dict, Response]
|
|
499
|
+
"""
|
|
500
|
+
|
|
501
|
+
def format_choice(item: str) -> Union[str, Mapping[str, Any]]:
|
|
502
|
+
# response_format is one of text or json_object.
|
|
503
|
+
# https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
|
|
504
|
+
if not is_json_format:
|
|
505
|
+
return item
|
|
506
|
+
|
|
507
|
+
result_dict = json.loads(item)
|
|
508
|
+
if not outputs:
|
|
509
|
+
return result_dict
|
|
510
|
+
|
|
511
|
+
# return the keys in outputs
|
|
512
|
+
output_results = {}
|
|
513
|
+
for key in outputs:
|
|
514
|
+
if key not in result_dict:
|
|
515
|
+
raise InvalidInputError(f"Cannot find '{key}' in response {list(result_dict.keys())}")
|
|
516
|
+
output_results[key] = result_dict[key]
|
|
517
|
+
return output_results
|
|
518
|
+
|
|
519
|
+
async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> AsyncGenerator[str, None]:
|
|
520
|
+
cur_index = None
|
|
521
|
+
async for chunk in llm_response:
|
|
522
|
+
if len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
|
523
|
+
if cur_index is None:
|
|
524
|
+
cur_index = chunk.choices[0].index
|
|
525
|
+
if cur_index != chunk.choices[0].index:
|
|
526
|
+
return
|
|
527
|
+
yield chunk.choices[0].delta.content
|
|
528
|
+
|
|
529
|
+
to_ret = {
|
|
530
|
+
"llm_output": None,
|
|
531
|
+
"input_token_count": 0,
|
|
532
|
+
"output_token_count": 0,
|
|
533
|
+
"total_token_count": 0,
|
|
534
|
+
"finish_reason": "",
|
|
535
|
+
"model_id": "",
|
|
536
|
+
"sample_input": "",
|
|
537
|
+
"sample_output": "",
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if not is_first_choice:
|
|
541
|
+
to_ret["llm_output"] = response
|
|
542
|
+
return to_ret # we don't actually use this code path since streaming is not used, so set token counts to 0
|
|
543
|
+
|
|
544
|
+
is_json_format = isinstance(response_format, dict) and response_format.get("type") == "json_object"
|
|
545
|
+
if isinstance(response, AsyncStream):
|
|
546
|
+
if not is_json_format:
|
|
547
|
+
to_ret["llm_output"] = format_stream(llm_response=response)
|
|
548
|
+
return to_ret
|
|
549
|
+
content = "".join([item async for item in format_stream(llm_response=response)])
|
|
550
|
+
to_ret["llm_output"] = format_choice(content)
|
|
551
|
+
return to_ret # we don't actually use this code path since streaming is not used, so set token counts to 0
|
|
552
|
+
else:
|
|
553
|
+
input_token_count = response.usage.prompt_tokens if response.usage and response.usage.prompt_tokens else 0
|
|
554
|
+
output_token_count = (
|
|
555
|
+
response.usage.completion_tokens if response.usage and response.usage.completion_tokens else 0
|
|
556
|
+
)
|
|
557
|
+
total_token_count = response.usage.total_tokens if response.usage and response.usage.total_tokens else 0
|
|
558
|
+
finish_reason = (
|
|
559
|
+
response.choices[0].finish_reason if response.choices and response.choices[0].finish_reason else ""
|
|
560
|
+
)
|
|
561
|
+
model_id = response.model if response.model else ""
|
|
562
|
+
sample_output_list = (
|
|
563
|
+
[{"role": response.choices[0].message.role, "content": response.choices[0].message.content}]
|
|
564
|
+
if (response.choices and response.choices[0].message.content and response.choices[0].message.role)
|
|
565
|
+
else []
|
|
566
|
+
)
|
|
567
|
+
sample_output = json.dumps(sample_output_list)
|
|
568
|
+
input_str = f"{json.dumps(inputs)}" if inputs else ""
|
|
569
|
+
if inputs and len(inputs) > 0:
|
|
570
|
+
sample_input_json = []
|
|
571
|
+
msg = ChatCompletionUserMessageParam(
|
|
572
|
+
role="user",
|
|
573
|
+
content=input_str,
|
|
574
|
+
)
|
|
575
|
+
sample_input_json.append(msg)
|
|
576
|
+
sample_input = json.dumps(sample_input_json)
|
|
577
|
+
|
|
578
|
+
# When calling function/tool, function_call/tool_call response will be returned as a field in message,
|
|
579
|
+
# so we need return message directly. Otherwise, we only return content.
|
|
580
|
+
# https://platform.openai.com/docs/api-reference/chat/object#chat/object-choices
|
|
581
|
+
if response.choices[0].finish_reason in ["tool_calls", "function_calls"]:
|
|
582
|
+
response_content = response.model_dump()["choices"][0]["message"]
|
|
583
|
+
else:
|
|
584
|
+
response_content = getattr(response.choices[0].message, "content", "")
|
|
585
|
+
result = format_choice(response_content)
|
|
586
|
+
to_ret["llm_output"] = result
|
|
587
|
+
to_ret["input_token_count"] = input_token_count
|
|
588
|
+
to_ret["output_token_count"] = output_token_count
|
|
589
|
+
to_ret["total_token_count"] = total_token_count
|
|
590
|
+
to_ret["finish_reason"] = finish_reason
|
|
591
|
+
to_ret["model_id"] = model_id
|
|
592
|
+
to_ret["sample_input"] = sample_input
|
|
593
|
+
to_ret["sample_output"] = sample_output
|
|
594
|
+
return to_ret
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def openai_error_retryable(
|
|
598
|
+
error: OpenAIError, retry: int, entity_retry: List[int], max_entity_retries: int
|
|
599
|
+
) -> Tuple[bool, float]:
|
|
600
|
+
"""
|
|
601
|
+
Determines if an OpenAI error is retryable, and optionally determines the min retry delay to use.
|
|
602
|
+
If none is returned, the caller will determine the delay to use.
|
|
603
|
+
|
|
604
|
+
:param OpenAIError error: The error to handle
|
|
605
|
+
:param int retry: The current retry count (0 means we're on the first attempt and no retries have been made)
|
|
606
|
+
:param List[int] entity_retry: The current retry count for the unprocessable entity failures. This should be a
|
|
607
|
+
list containing only 1 element to mimic pass by reference semantics. A value of 0 means we're on the
|
|
608
|
+
first attempt and no retries have been made.
|
|
609
|
+
:param int max_entity_retries: The maximum number of retries to make for unprocessable entity failures
|
|
610
|
+
:return: A tuple containing whether the error is retryable and the min delay to use if any
|
|
611
|
+
:rtype: Tuple[bool, Optional[float]]
|
|
612
|
+
"""
|
|
613
|
+
|
|
614
|
+
# Using https://platform.openai.com/docs/guides/error-codes/api-errors#python-library-error-types as a reference
|
|
615
|
+
|
|
616
|
+
should_retry: bool
|
|
617
|
+
delay: Optional[float] = None
|
|
618
|
+
|
|
619
|
+
if isinstance(error, APIConnectionError):
|
|
620
|
+
retriable_error_messages: Sequence[str] = [
|
|
621
|
+
"connection aborted",
|
|
622
|
+
# issue 2296
|
|
623
|
+
"server disconnected without sending a response",
|
|
624
|
+
]
|
|
625
|
+
should_retry = (
|
|
626
|
+
isinstance(error, APITimeoutError) # APITimeoutError is a subclass of APIConnectionError
|
|
627
|
+
or str(error).lower() in retriable_error_messages
|
|
628
|
+
or str(error.__cause__).lower() in retriable_error_messages
|
|
629
|
+
)
|
|
630
|
+
elif isinstance(error, APIStatusError):
|
|
631
|
+
status_code: int = error.response.status_code
|
|
632
|
+
if status_code == 422:
|
|
633
|
+
# As per the original legacy code, UnprocessableEntityError (HTTP 422) should be handled differently
|
|
634
|
+
# with a smaller retry count, as retrying more may not be beneficial.
|
|
635
|
+
should_retry = entity_retry[0] < max_entity_retries
|
|
636
|
+
entity_retry[0] += 1
|
|
637
|
+
elif status_code == 429:
|
|
638
|
+
# Two types, one is you are throttled and so should retry after a delay, the other is you have exceeded
|
|
639
|
+
# your quota and should not retry.
|
|
640
|
+
if (error.type or "").lower() == "insufficient_quota":
|
|
641
|
+
should_retry = False
|
|
642
|
+
else:
|
|
643
|
+
should_retry = True
|
|
644
|
+
should_retry = error.type != "insufficient_quota"
|
|
645
|
+
else:
|
|
646
|
+
should_retry = status_code >= 500
|
|
647
|
+
|
|
648
|
+
# Use what the service tells us to use for the delay if it's provided
|
|
649
|
+
if should_retry and not delay:
|
|
650
|
+
delay_str = error.response.headers.get("Retry-After", None)
|
|
651
|
+
if delay_str is not None:
|
|
652
|
+
delay = float(delay_str)
|
|
653
|
+
else:
|
|
654
|
+
should_retry = False
|
|
655
|
+
|
|
656
|
+
# Use exponential backoff for retries if the service doesn't provide a delay
|
|
657
|
+
if not delay:
|
|
658
|
+
delay = min(60, 2 + 2**retry)
|
|
659
|
+
|
|
660
|
+
return (should_retry, delay)
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
# endregion
|