azure-ai-evaluation 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of azure-ai-evaluation might be problematic. Click here for more details.
- azure/ai/evaluation/__init__.py +27 -1
- azure/ai/evaluation/_azure/_models.py +6 -6
- azure/ai/evaluation/_common/constants.py +6 -2
- azure/ai/evaluation/_common/rai_service.py +39 -5
- azure/ai/evaluation/_common/raiclient/__init__.py +34 -0
- azure/ai/evaluation/_common/raiclient/_client.py +128 -0
- azure/ai/evaluation/_common/raiclient/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/_model_base.py +1235 -0
- azure/ai/evaluation/_common/raiclient/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/_serialization.py +2050 -0
- azure/ai/evaluation/_common/raiclient/_version.py +9 -0
- azure/ai/evaluation/_common/raiclient/aio/__init__.py +29 -0
- azure/ai/evaluation/_common/raiclient/aio/_client.py +130 -0
- azure/ai/evaluation/_common/raiclient/aio/_configuration.py +87 -0
- azure/ai/evaluation/_common/raiclient/aio/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_operations.py +981 -0
- azure/ai/evaluation/_common/raiclient/aio/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/models/__init__.py +60 -0
- azure/ai/evaluation/_common/raiclient/models/_enums.py +18 -0
- azure/ai/evaluation/_common/raiclient/models/_models.py +651 -0
- azure/ai/evaluation/_common/raiclient/models/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/operations/__init__.py +25 -0
- azure/ai/evaluation/_common/raiclient/operations/_operations.py +1225 -0
- azure/ai/evaluation/_common/raiclient/operations/_patch.py +20 -0
- azure/ai/evaluation/_common/raiclient/py.typed +1 -0
- azure/ai/evaluation/_common/utils.py +23 -3
- azure/ai/evaluation/_constants.py +7 -0
- azure/ai/evaluation/_converters/__init__.py +3 -0
- azure/ai/evaluation/_converters/_ai_services.py +804 -0
- azure/ai/evaluation/_converters/_models.py +302 -0
- azure/ai/evaluation/_evaluate/_batch_run/__init__.py +10 -3
- azure/ai/evaluation/_evaluate/_batch_run/_run_submitter_client.py +104 -0
- azure/ai/evaluation/_evaluate/_batch_run/batch_clients.py +82 -0
- azure/ai/evaluation/_evaluate/_batch_run/code_client.py +18 -12
- azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +9 -4
- azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py +42 -22
- azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +1 -1
- azure/ai/evaluation/_evaluate/_eval_run.py +2 -2
- azure/ai/evaluation/_evaluate/_evaluate.py +109 -64
- azure/ai/evaluation/_evaluate/_telemetry/__init__.py +5 -89
- azure/ai/evaluation/_evaluate/_utils.py +3 -3
- azure/ai/evaluation/_evaluators/_bleu/_bleu.py +23 -3
- azure/ai/evaluation/_evaluators/_code_vulnerability/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_code_vulnerability/_code_vulnerability.py +120 -0
- azure/ai/evaluation/_evaluators/_coherence/_coherence.py +21 -2
- azure/ai/evaluation/_evaluators/_common/_base_eval.py +44 -4
- azure/ai/evaluation/_evaluators/_common/_base_multi_eval.py +4 -2
- azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py +44 -5
- azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +16 -4
- azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +42 -5
- azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +15 -0
- azure/ai/evaluation/_evaluators/_content_safety/_violence.py +15 -0
- azure/ai/evaluation/_evaluators/_f1_score/_f1_score.py +28 -4
- azure/ai/evaluation/_evaluators/_fluency/_fluency.py +21 -2
- azure/ai/evaluation/_evaluators/_gleu/_gleu.py +26 -3
- azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py +22 -4
- azure/ai/evaluation/_evaluators/_intent_resolution/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/_intent_resolution.py +152 -0
- azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty +161 -0
- azure/ai/evaluation/_evaluators/_meteor/_meteor.py +26 -3
- azure/ai/evaluation/_evaluators/_qa/_qa.py +51 -7
- azure/ai/evaluation/_evaluators/_relevance/_relevance.py +26 -2
- azure/ai/evaluation/_evaluators/_response_completeness/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_response_completeness/_response_completeness.py +158 -0
- azure/ai/evaluation/_evaluators/_response_completeness/response_completeness.prompty +99 -0
- azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +21 -2
- azure/ai/evaluation/_evaluators/_rouge/_rouge.py +113 -4
- azure/ai/evaluation/_evaluators/_service_groundedness/_service_groundedness.py +23 -3
- azure/ai/evaluation/_evaluators/_similarity/_similarity.py +24 -5
- azure/ai/evaluation/_evaluators/_task_adherence/__init__.py +7 -0
- azure/ai/evaluation/_evaluators/_task_adherence/_task_adherence.py +148 -0
- azure/ai/evaluation/_evaluators/_task_adherence/task_adherence.prompty +117 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/__init__.py +9 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py +292 -0
- azure/ai/evaluation/_evaluators/_tool_call_accuracy/tool_call_accuracy.prompty +71 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/__init__.py +5 -0
- azure/ai/evaluation/_evaluators/_ungrounded_attributes/_ungrounded_attributes.py +103 -0
- azure/ai/evaluation/_evaluators/_xpia/xpia.py +2 -0
- azure/ai/evaluation/_exceptions.py +5 -0
- azure/ai/evaluation/_legacy/__init__.py +3 -0
- azure/ai/evaluation/_legacy/_adapters/__init__.py +21 -0
- azure/ai/evaluation/_legacy/_adapters/_configuration.py +45 -0
- azure/ai/evaluation/_legacy/_adapters/_constants.py +10 -0
- azure/ai/evaluation/_legacy/_adapters/_errors.py +29 -0
- azure/ai/evaluation/_legacy/_adapters/_flows.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/_service.py +16 -0
- azure/ai/evaluation/_legacy/_adapters/client.py +51 -0
- azure/ai/evaluation/_legacy/_adapters/entities.py +26 -0
- azure/ai/evaluation/_legacy/_adapters/tracing.py +28 -0
- azure/ai/evaluation/_legacy/_adapters/types.py +15 -0
- azure/ai/evaluation/_legacy/_adapters/utils.py +31 -0
- azure/ai/evaluation/_legacy/_batch_engine/__init__.py +9 -0
- azure/ai/evaluation/_legacy/_batch_engine/_config.py +45 -0
- azure/ai/evaluation/_legacy/_batch_engine/_engine.py +368 -0
- azure/ai/evaluation/_legacy/_batch_engine/_exceptions.py +88 -0
- azure/ai/evaluation/_legacy/_batch_engine/_logging.py +292 -0
- azure/ai/evaluation/_legacy/_batch_engine/_openai_injector.py +23 -0
- azure/ai/evaluation/_legacy/_batch_engine/_result.py +99 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run.py +121 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_storage.py +128 -0
- azure/ai/evaluation/_legacy/_batch_engine/_run_submitter.py +217 -0
- azure/ai/evaluation/_legacy/_batch_engine/_status.py +25 -0
- azure/ai/evaluation/_legacy/_batch_engine/_trace.py +105 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils.py +82 -0
- azure/ai/evaluation/_legacy/_batch_engine/_utils_deprecated.py +131 -0
- azure/ai/evaluation/_legacy/prompty/__init__.py +36 -0
- azure/ai/evaluation/_legacy/prompty/_connection.py +182 -0
- azure/ai/evaluation/_legacy/prompty/_exceptions.py +59 -0
- azure/ai/evaluation/_legacy/prompty/_prompty.py +313 -0
- azure/ai/evaluation/_legacy/prompty/_utils.py +545 -0
- azure/ai/evaluation/_legacy/prompty/_yaml_utils.py +99 -0
- azure/ai/evaluation/_safety_evaluation/__init__.py +1 -1
- azure/ai/evaluation/_safety_evaluation/_generated_rai_client.py +0 -0
- azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py +251 -150
- azure/ai/evaluation/_version.py +1 -1
- azure/ai/evaluation/red_team/__init__.py +19 -0
- azure/ai/evaluation/red_team/_attack_objective_generator.py +195 -0
- azure/ai/evaluation/red_team/_attack_strategy.py +45 -0
- azure/ai/evaluation/red_team/_callback_chat_target.py +74 -0
- azure/ai/evaluation/red_team/_default_converter.py +21 -0
- azure/ai/evaluation/red_team/_red_team.py +1887 -0
- azure/ai/evaluation/red_team/_red_team_result.py +382 -0
- azure/ai/evaluation/red_team/_utils/__init__.py +3 -0
- azure/ai/evaluation/red_team/_utils/constants.py +65 -0
- azure/ai/evaluation/red_team/_utils/formatting_utils.py +165 -0
- azure/ai/evaluation/red_team/_utils/logging_utils.py +139 -0
- azure/ai/evaluation/red_team/_utils/strategy_utils.py +192 -0
- azure/ai/evaluation/simulator/_adversarial_scenario.py +3 -1
- azure/ai/evaluation/simulator/_adversarial_simulator.py +54 -27
- azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py +145 -0
- azure/ai/evaluation/simulator/_model_tools/_rai_client.py +71 -1
- azure/ai/evaluation/simulator/_simulator.py +1 -1
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/METADATA +80 -15
- azure_ai_evaluation-1.5.0.dist-info/RECORD +207 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/WHEEL +1 -1
- azure/ai/evaluation/simulator/_tracing.py +0 -89
- azure_ai_evaluation-1.3.0.dist-info/RECORD +0 -119
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/NOTICE.txt +0 -0
- {azure_ai_evaluation-1.3.0.dist-info → azure_ai_evaluation-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source:
|
|
6
|
+
# - promptflow-core/promptflow/_core/log_manager.py
|
|
7
|
+
# - promptflow-core/promptflow/_utils/logger_utils.py
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
import sys
|
|
13
|
+
from re import Pattern
|
|
14
|
+
from contextvars import ContextVar
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from io import StringIO, TextIOBase
|
|
18
|
+
from typing import Any, Dict, Final, Mapping, Optional, Set, TextIO, Tuple, Union
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
valid_logging_level: Final[Set[str]] = {"CRITICAL", "FATAL", "ERROR", "WARN", "WARNING", "INFO", "DEBUG", "NOTSET"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_pf_logging_level(default=logging.INFO):
|
|
25
|
+
logging_level = os.environ.get("PF_LOGGING_LEVEL", None)
|
|
26
|
+
if logging_level not in valid_logging_level:
|
|
27
|
+
# Fall back to info if user input is invalid.
|
|
28
|
+
logging_level = default
|
|
29
|
+
return logging_level
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_format_for_logger(
|
|
33
|
+
default_log_format: Optional[str] = None, default_date_format: Optional[str] = None
|
|
34
|
+
) -> Tuple[str, str]:
|
|
35
|
+
"""
|
|
36
|
+
Get the logging format and date format for logger.
|
|
37
|
+
|
|
38
|
+
This function attempts to find the handler of the root logger with a configured formatter.
|
|
39
|
+
If such a handler is found, it returns the format and date format used by this handler.
|
|
40
|
+
This can be configured through logging.basicConfig. If no configured formatter is found,
|
|
41
|
+
it defaults to LOG_FORMAT and DATETIME_FORMAT.
|
|
42
|
+
"""
|
|
43
|
+
log_format = (
|
|
44
|
+
os.environ.get("PF_LOG_FORMAT")
|
|
45
|
+
or default_log_format
|
|
46
|
+
or "%(asctime)s %(thread)7d %(name)-18s %(levelname)-8s %(message)s"
|
|
47
|
+
)
|
|
48
|
+
datetime_format = os.environ.get("PF_LOG_DATETIME_FORMAT") or default_date_format or "%Y-%m-%d %H:%M:%S %z"
|
|
49
|
+
return log_format, datetime_format
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_logger(name: str) -> logging.Logger:
|
|
53
|
+
"""Get logger used during execution."""
|
|
54
|
+
logger = logging.Logger(name)
|
|
55
|
+
logger.setLevel(get_pf_logging_level())
|
|
56
|
+
# logger.addHandler(FileHandlerConcurrentWrapper())
|
|
57
|
+
stdout_handler = logging.StreamHandler(sys.stdout)
|
|
58
|
+
fmt, datefmt = _get_format_for_logger()
|
|
59
|
+
# TODO ralphe: Do we need a credentials scrubber here like the old code had? We are not logging
|
|
60
|
+
# logging anything that sensitive here.
|
|
61
|
+
stdout_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt=datefmt))
|
|
62
|
+
logger.addHandler(stdout_handler)
|
|
63
|
+
return logger
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def scrub_credentials(s: str):
|
|
67
|
+
"""Scrub credentials in string s.
|
|
68
|
+
|
|
69
|
+
For example, for input string: "print accountkey=accountKey", the output will be:
|
|
70
|
+
"print accountkey=**data_scrubbed**"
|
|
71
|
+
"""
|
|
72
|
+
# for h in logger.handlers:
|
|
73
|
+
# if isinstance(h, FileHandlerConcurrentWrapper):
|
|
74
|
+
# if h.handler and h.handler._formatter:
|
|
75
|
+
# credential_scrubber = h.handler._formatter.credential_scrubber
|
|
76
|
+
# if credential_scrubber:
|
|
77
|
+
# return credential_scrubber.scrub(s)
|
|
78
|
+
return CredentialScrubber.scrub(s)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class CredentialScrubber:
|
|
82
|
+
"""Scrub sensitive information in string."""
|
|
83
|
+
|
|
84
|
+
PLACE_HOLDER = "**data_scrubbed**"
|
|
85
|
+
LENGTH_THRESHOLD = 2
|
|
86
|
+
DEFAULT_REGEX_SET: Final[Set[Pattern[str]]] = {
|
|
87
|
+
re.compile(r"(?<=sig=)[^\s;&]+", flags=re.IGNORECASE), # Replace signature.
|
|
88
|
+
re.compile(r"(?<=key=)[^\s;&]+", flags=re.IGNORECASE), # Replace key.
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def scrub(input: str) -> str:
|
|
93
|
+
"""Replace sensitive information in input string with PLACE_HOLDER.
|
|
94
|
+
|
|
95
|
+
For example, for input string: "print accountkey=accountKey", the output will be:
|
|
96
|
+
"print accountkey=**data_scrubbed**"
|
|
97
|
+
"""
|
|
98
|
+
output = input
|
|
99
|
+
for regex in CredentialScrubber.DEFAULT_REGEX_SET:
|
|
100
|
+
output = regex.sub(CredentialScrubber.PLACE_HOLDER, output)
|
|
101
|
+
return output
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Logs by flow_logger will only be shown in flow mode.
|
|
105
|
+
# These logs should contain all detailed logs from executor and runtime.
|
|
106
|
+
flow_logger = get_logger("execution.flow")
|
|
107
|
+
|
|
108
|
+
# Logs by bulk_logger will only be shown in bulktest and eval modes.
|
|
109
|
+
# These logs should contain overall progress logs and error logs.
|
|
110
|
+
bulk_logger = get_logger("execution.bulk")
|
|
111
|
+
|
|
112
|
+
# Logs by logger will be shown in all the modes above,
|
|
113
|
+
# such as error logs.
|
|
114
|
+
logger = get_logger("execution")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def log_progress(
|
|
118
|
+
run_start_time: datetime,
|
|
119
|
+
total_count: int,
|
|
120
|
+
current_count: int,
|
|
121
|
+
logger: logging.Logger = bulk_logger,
|
|
122
|
+
formatter="Finished {count} / {total_count} lines.",
|
|
123
|
+
) -> None:
|
|
124
|
+
if current_count > 0:
|
|
125
|
+
delta = datetime.now(timezone.utc).timestamp() - run_start_time.timestamp()
|
|
126
|
+
average_execution_time = round(delta / current_count, 2)
|
|
127
|
+
estimated_execution_time = round(average_execution_time * (total_count - current_count), 2)
|
|
128
|
+
logger.info(formatter.format(count=current_count, total_count=total_count))
|
|
129
|
+
logger.info(
|
|
130
|
+
f"Average execution time for completed lines: {average_execution_time} seconds. "
|
|
131
|
+
f"Estimated time for incomplete lines: {estimated_execution_time} seconds."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def incremental_print(log: str, printed: int, fileout: Union[TextIO, Any]) -> int:
|
|
136
|
+
count = 0
|
|
137
|
+
for line in log.splitlines():
|
|
138
|
+
if count >= printed:
|
|
139
|
+
fileout.write(line + "\n")
|
|
140
|
+
printed += 1
|
|
141
|
+
count += 1
|
|
142
|
+
return printed
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def print_red_error(message):
|
|
146
|
+
try:
|
|
147
|
+
from colorama import Fore, init
|
|
148
|
+
|
|
149
|
+
init(autoreset=True)
|
|
150
|
+
print(Fore.RED + message)
|
|
151
|
+
except ImportError:
|
|
152
|
+
print(message)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass
|
|
156
|
+
class NodeInfo:
|
|
157
|
+
run_id: str
|
|
158
|
+
node_name: str
|
|
159
|
+
line_number: int
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class NodeLogManager:
|
|
163
|
+
"""Replace sys.stdout and sys.stderr with NodeLogWriter.
|
|
164
|
+
|
|
165
|
+
This class intercepts and saves logs to stdout/stderr when executing a node. For example:
|
|
166
|
+
with NodeLogManager() as log_manager:
|
|
167
|
+
print('test stdout')
|
|
168
|
+
print('test stderr', file=sys.stderr)
|
|
169
|
+
|
|
170
|
+
log_manager.get_logs() will return: {'stdout': 'test stdout\n', 'stderr': 'test stderr\n'}
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, record_datetime: bool = True):
|
|
174
|
+
self.stdout_logger = NodeLogWriter(sys.stdout, record_datetime)
|
|
175
|
+
self.stderr_logger = NodeLogWriter(sys.stderr, record_datetime, is_stderr=True)
|
|
176
|
+
|
|
177
|
+
def __enter__(self) -> "NodeLogManager":
|
|
178
|
+
"""Replace sys.stdout and sys.stderr with NodeLogWriter."""
|
|
179
|
+
self._prev_stdout = sys.stdout
|
|
180
|
+
self._prev_stderr = sys.stderr
|
|
181
|
+
sys.stdout = self.stdout_logger
|
|
182
|
+
sys.stderr = self.stderr_logger
|
|
183
|
+
return self
|
|
184
|
+
|
|
185
|
+
def __exit__(self, *args) -> None:
|
|
186
|
+
"""Restore sys.stdout and sys.stderr."""
|
|
187
|
+
sys.stdout = self._prev_stdout
|
|
188
|
+
sys.stderr = self._prev_stderr
|
|
189
|
+
|
|
190
|
+
def set_node_context(self, run_id: str, node_name: str, line_number: int) -> None:
|
|
191
|
+
"""Set node context."""
|
|
192
|
+
self.stdout_logger.set_node_info(run_id, node_name, line_number)
|
|
193
|
+
self.stderr_logger.set_node_info(run_id, node_name, line_number)
|
|
194
|
+
|
|
195
|
+
def clear_node_context(self, run_id: str) -> None:
|
|
196
|
+
"""Clear node context."""
|
|
197
|
+
self.stdout_logger.clear_node_info(run_id)
|
|
198
|
+
self.stderr_logger.clear_node_info(run_id)
|
|
199
|
+
|
|
200
|
+
def get_logs(self, run_id: str) -> Mapping[str, str]:
|
|
201
|
+
return {
|
|
202
|
+
"stdout": self.stdout_logger.get_log(run_id),
|
|
203
|
+
"stderr": self.stderr_logger.get_log(run_id),
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class NodeLogWriter(TextIOBase):
|
|
208
|
+
"""Record node run logs."""
|
|
209
|
+
|
|
210
|
+
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
|
|
211
|
+
|
|
212
|
+
def __init__(self, prev_stdout: Union[TextIOBase, Any], record_datetime: bool = True, is_stderr: bool = False):
|
|
213
|
+
self.run_id_to_stdout: Dict[str, StringIO] = {}
|
|
214
|
+
self._context: ContextVar[Optional[NodeInfo]] = ContextVar("run_log_info", default=None)
|
|
215
|
+
self._prev_out: Union[TextIOBase, Any] = prev_stdout
|
|
216
|
+
self._record_datetime: bool = record_datetime
|
|
217
|
+
self._is_stderr: bool = is_stderr
|
|
218
|
+
|
|
219
|
+
def set_node_info(self, run_id: str, node_name: str, line_number: int) -> None:
|
|
220
|
+
"""Set node info to a context variable.
|
|
221
|
+
|
|
222
|
+
After set node info, write method will write to string IO associated with this node.
|
|
223
|
+
"""
|
|
224
|
+
run_log_info = NodeInfo(run_id, node_name, line_number)
|
|
225
|
+
self._context.set(run_log_info)
|
|
226
|
+
self.run_id_to_stdout.update({run_id: StringIO()})
|
|
227
|
+
|
|
228
|
+
def clear_node_info(self, run_id: str):
|
|
229
|
+
"""Clear context variable associated with run id."""
|
|
230
|
+
log_info: Optional[NodeInfo] = self._context.get()
|
|
231
|
+
if log_info and log_info.run_id == run_id:
|
|
232
|
+
self._context.set(None)
|
|
233
|
+
|
|
234
|
+
if run_id in self.run_id_to_stdout:
|
|
235
|
+
self.run_id_to_stdout.pop(run_id)
|
|
236
|
+
|
|
237
|
+
def get_log(self, run_id: str) -> str:
|
|
238
|
+
"""Get log associated with run id."""
|
|
239
|
+
string_io: Optional[StringIO] = self.run_id_to_stdout.get(run_id)
|
|
240
|
+
if string_io is None:
|
|
241
|
+
return ""
|
|
242
|
+
|
|
243
|
+
return string_io.getvalue()
|
|
244
|
+
|
|
245
|
+
def write(self, s: str) -> int:
|
|
246
|
+
"""Override TextIO's write method and writes input string into a string IO
|
|
247
|
+
|
|
248
|
+
The written string is compliant without any credentials.
|
|
249
|
+
The string is also recorded to flow/bulk logger.
|
|
250
|
+
If node info is not set, write to previous stdout.
|
|
251
|
+
"""
|
|
252
|
+
log_info: Optional[NodeInfo] = self._context.get()
|
|
253
|
+
s = scrub_credentials(s) # Remove credential from string.
|
|
254
|
+
if log_info is None:
|
|
255
|
+
return self._prev_out.write(s)
|
|
256
|
+
else:
|
|
257
|
+
self._write_to_flow_log(log_info, s)
|
|
258
|
+
stdout: Optional[StringIO] = self.run_id_to_stdout.get(log_info.run_id)
|
|
259
|
+
# When the line execution timeout is reached, all running nodes will be cancelled and node info will
|
|
260
|
+
# be cleared. This will remove StringIO from self.run_id_to_stdout. For sync tools running in a worker
|
|
261
|
+
# thread, they can't be stopped and self._context won't change in the worker
|
|
262
|
+
# thread because it's a thread-local variable. Therefore, we need to check if StringIO is None here.
|
|
263
|
+
if stdout is None:
|
|
264
|
+
return 0
|
|
265
|
+
if self._record_datetime and s != "\n": # For line breaker, do not add datetime prefix.
|
|
266
|
+
s = f"[{datetime.now(timezone.utc).strftime(self.DATETIME_FORMAT)}] {s}"
|
|
267
|
+
return stdout.write(s)
|
|
268
|
+
|
|
269
|
+
def flush(self):
|
|
270
|
+
"""Override TextIO's flush method."""
|
|
271
|
+
node_info: Optional[NodeInfo] = self._context.get()
|
|
272
|
+
if node_info is None:
|
|
273
|
+
self._prev_out.flush()
|
|
274
|
+
else:
|
|
275
|
+
string_io = self.run_id_to_stdout.get(node_info.run_id)
|
|
276
|
+
if string_io is not None:
|
|
277
|
+
string_io.flush()
|
|
278
|
+
|
|
279
|
+
def _write_to_flow_log(self, log_info: NodeInfo, s: str):
|
|
280
|
+
"""Save stdout log to flow_logger and stderr log to logger."""
|
|
281
|
+
# If user uses "print('log message.')" to log, then
|
|
282
|
+
# "write" method will be called twice and the second time input is only '\n'.
|
|
283
|
+
# For this case, should not log '\n' in flow_logger.
|
|
284
|
+
if s != "\n":
|
|
285
|
+
if self._is_stderr:
|
|
286
|
+
flow_log = f"[{str(log_info)}] stderr> " + s.rstrip("\n")
|
|
287
|
+
# Log stderr in all scenarios so we can diagnose problems.
|
|
288
|
+
logger.warning(flow_log)
|
|
289
|
+
else:
|
|
290
|
+
flow_log = f"[{str(log_info)}] stdout> " + s.rstrip("\n")
|
|
291
|
+
# Log stdout only in flow mode.
|
|
292
|
+
flow_logger.info(flow_log)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source code: promptflow-tracing/promptflow/tracing/_integrations/_openai_injector.py
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def inject_openai_api():
|
|
9
|
+
"""This function:
|
|
10
|
+
1. Modifies the create methods of the OpenAI API classes to inject logic before calling the original methods.
|
|
11
|
+
It stores the original methods as _original attributes of the create methods.
|
|
12
|
+
2. Updates the openai api configs from environment variables.
|
|
13
|
+
"""
|
|
14
|
+
# TODO ralphe: Port function?
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def recover_openai_api():
|
|
19
|
+
"""This function restores the original create methods of the OpenAI API classes
|
|
20
|
+
by assigning them back from the _original attributes of the modified methods.
|
|
21
|
+
"""
|
|
22
|
+
# TODO ralphe: Port function?
|
|
23
|
+
pass
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from typing import Any, Mapping, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
from ._status import BatchStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TokenMetrics:
|
|
14
|
+
"""The token metrics of a run."""
|
|
15
|
+
|
|
16
|
+
prompt_tokens: int
|
|
17
|
+
"""The number of tokens used in the prompt for the run."""
|
|
18
|
+
completion_tokens: int
|
|
19
|
+
"""The number of tokens used in the completion for the run."""
|
|
20
|
+
total_tokens: int
|
|
21
|
+
"""The total number of tokens used in the run."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class BatchRunError:
|
|
26
|
+
"""The error of a batch run."""
|
|
27
|
+
|
|
28
|
+
details: str
|
|
29
|
+
"""The details of the error."""
|
|
30
|
+
exception: Optional[BaseException]
|
|
31
|
+
"""The exception of the error."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class BatchRunDetails:
|
|
36
|
+
"""The error of a line in a batch run."""
|
|
37
|
+
|
|
38
|
+
id: str
|
|
39
|
+
"""The ID of the line run."""
|
|
40
|
+
status: BatchStatus
|
|
41
|
+
"""The status of the line run."""
|
|
42
|
+
result: Optional[Mapping[str, Any]]
|
|
43
|
+
"""The result of the line run."""
|
|
44
|
+
start_time: Optional[datetime]
|
|
45
|
+
"""The start time of the line run. If this was never started, this should be None."""
|
|
46
|
+
end_time: Optional[datetime]
|
|
47
|
+
"""The end time of the line run. If this never completed, this should be None."""
|
|
48
|
+
tokens: TokenMetrics
|
|
49
|
+
"""The token metrics of the line run."""
|
|
50
|
+
error: Optional[BatchRunError]
|
|
51
|
+
"""The error of the line run. This will only be set if the status is Failed."""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def duration(self) -> timedelta:
|
|
55
|
+
"""The duration of the line run."""
|
|
56
|
+
if self.start_time is not None and self.end_time is not None:
|
|
57
|
+
return self.end_time - self.start_time
|
|
58
|
+
return timedelta(0)
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def create_id(run_id: str, index: int) -> str:
|
|
62
|
+
"""Helper method to create the ID for a line run."""
|
|
63
|
+
return f"{run_id}_{index}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class BatchResult:
|
|
68
|
+
"""The result of a batch run."""
|
|
69
|
+
|
|
70
|
+
status: BatchStatus
|
|
71
|
+
"""The overall status of the batch run."""
|
|
72
|
+
total_lines: int
|
|
73
|
+
"""The total number of lines in the batch run."""
|
|
74
|
+
failed_lines: int
|
|
75
|
+
"""The number of failed lines in the batch run."""
|
|
76
|
+
start_time: datetime
|
|
77
|
+
"""The start time of the batch run."""
|
|
78
|
+
end_time: datetime
|
|
79
|
+
"""The end time of the batch run."""
|
|
80
|
+
tokens: TokenMetrics
|
|
81
|
+
"""The overall token metrics of the batch run."""
|
|
82
|
+
details: Sequence[BatchRunDetails]
|
|
83
|
+
"""The details of each line in the batch run."""
|
|
84
|
+
error: Optional[Exception] = None
|
|
85
|
+
"""The error of the batch run. This will only be set if the status does not indicate success."""
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def duration(self) -> timedelta:
|
|
89
|
+
"""The duration of the batch run."""
|
|
90
|
+
if self.start_time is not None and self.end_time is not None:
|
|
91
|
+
return self.end_time - self.start_time
|
|
92
|
+
return timedelta(0)
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def results(self) -> Sequence[Optional[Mapping[str, Any]]]:
|
|
96
|
+
"""The results of the batch run."""
|
|
97
|
+
if not self.details:
|
|
98
|
+
return []
|
|
99
|
+
return [d.result for d in self.details]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Callable, Mapping, Optional, Sequence
|
|
9
|
+
|
|
10
|
+
from ._utils import normalize_identifier_name
|
|
11
|
+
from ._result import BatchResult
|
|
12
|
+
from ._status import BatchStatus
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RunStatus(Enum):
|
|
16
|
+
# TODO ralphe: Trim this to just the statuses we need
|
|
17
|
+
# QUEUED = "Queued"
|
|
18
|
+
NOT_STARTED = "NotStarted"
|
|
19
|
+
PREPARING = "Preparing"
|
|
20
|
+
# PROVISIONING = "Provisioning"
|
|
21
|
+
# STARTING = "Starting"
|
|
22
|
+
RUNNING = "Running"
|
|
23
|
+
# CANCEL_REQUESTED = "CancelRequested"
|
|
24
|
+
CANCELED = "Canceled"
|
|
25
|
+
# FINALIZING = "Finalizing"
|
|
26
|
+
COMPLETED = "Completed"
|
|
27
|
+
FAILED = "Failed"
|
|
28
|
+
# UNAPPROVED = "Unapproved"
|
|
29
|
+
# NOTRESPONDING = "NotResponding"
|
|
30
|
+
# PAUSING = "Pausing"
|
|
31
|
+
# PAUSED = "Paused"
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def from_batch_result_status(status: BatchStatus) -> "RunStatus":
|
|
35
|
+
if status == BatchStatus.NotStarted:
|
|
36
|
+
return RunStatus.NOT_STARTED
|
|
37
|
+
if status == BatchStatus.Running:
|
|
38
|
+
return RunStatus.RUNNING
|
|
39
|
+
if status == BatchStatus.Completed:
|
|
40
|
+
return RunStatus.COMPLETED
|
|
41
|
+
if status == BatchStatus.Canceled:
|
|
42
|
+
return RunStatus.CANCELED
|
|
43
|
+
if status == BatchStatus.Failed:
|
|
44
|
+
return RunStatus.FAILED
|
|
45
|
+
|
|
46
|
+
return RunStatus.FAILED
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Run:
|
|
50
|
+
"""The equivalent of a Promptflow Run
|
|
51
|
+
promptflow-devkit/promptflow/_sdk/entities/_run.py
|
|
52
|
+
|
|
53
|
+
THIS WILL BE REMOVED IN A FUTURE CODE UPDATE"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
dynamic_callable: Callable,
|
|
59
|
+
name_prefix: Optional[str],
|
|
60
|
+
inputs: Sequence[Mapping[str, Any]],
|
|
61
|
+
column_mapping: Mapping[str, str],
|
|
62
|
+
created_on: Optional[datetime] = None,
|
|
63
|
+
):
|
|
64
|
+
self._status: RunStatus = RunStatus.NOT_STARTED
|
|
65
|
+
self._created_on = created_on or datetime.now(timezone.utc)
|
|
66
|
+
self._start_time: Optional[datetime] = None
|
|
67
|
+
self._end_time: Optional[datetime] = None
|
|
68
|
+
|
|
69
|
+
self.dynamic_callable = dynamic_callable
|
|
70
|
+
self.name = self._generate_run_name(name_prefix, self._created_on)
|
|
71
|
+
self.inputs = inputs
|
|
72
|
+
self.column_mapping = column_mapping
|
|
73
|
+
self.result: Optional[BatchResult] = None
|
|
74
|
+
self.metrics: Mapping[str, Any] = {}
|
|
75
|
+
|
|
76
|
+
# self._use_remote_flow = False
|
|
77
|
+
# self._from_flex_flow = True
|
|
78
|
+
# self._from_prompty = False
|
|
79
|
+
# self.flow = path to pointless flow file
|
|
80
|
+
# self._experiment_name = name of folder containing pointless flow file
|
|
81
|
+
# self._lineage_id = basically equivalent to a hex digest of the SHA256 hash of:
|
|
82
|
+
# f"{uuid.getnod()}/{posix_full_path_to_pointless_folder}"
|
|
83
|
+
# self._output_path = Path("<user_folder>/.promptflow/runs/<self.name>")
|
|
84
|
+
# self._flow_name = name of pointless folder
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def status(self) -> RunStatus:
|
|
88
|
+
return self._status
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def created_on(self) -> datetime:
|
|
92
|
+
return self._created_on
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def duration(self) -> Optional[timedelta]:
|
|
96
|
+
if self._start_time is None or self._end_time is None:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
return self._end_time - self._start_time
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def outputs(self) -> Sequence[Mapping[str, Any]]:
|
|
103
|
+
if self.result is None:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
return [value or {} for value in self.result.results]
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _generate_run_name(name_prefix: Optional[str], creation_time: datetime) -> str:
|
|
110
|
+
# The Promptflow code looked at the folder name of the temporary folder used to
|
|
111
|
+
# store the temporary flow YAML file which was a single entry that told it look
|
|
112
|
+
# at the passed in dynamic_callable. Example folder name:
|
|
113
|
+
# azure_ai_evaluation_evaluators_common_base_eval_asyncevaluatorbase_l82059h3
|
|
114
|
+
# instead we will use the passed in name_prefix or use a UUID (which is equally
|
|
115
|
+
# opaque as what the original code did)
|
|
116
|
+
if not name_prefix:
|
|
117
|
+
name_prefix = str(uuid4())
|
|
118
|
+
|
|
119
|
+
timestamp = creation_time.strftime("%Y%m%d_%H%M%S_%f")
|
|
120
|
+
name = f"{name_prefix}_{timestamp}"
|
|
121
|
+
return normalize_identifier_name(name)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# ---------------------------------------------------------
|
|
2
|
+
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
3
|
+
# ---------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
# Original source:
|
|
6
|
+
# promptflow-devkit/promptflow/_sdk/operations/_local_storage_operations.py
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from contextlib import AbstractContextManager
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Final, Mapping, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
from ._result import BatchResult, TokenMetrics, BatchStatus
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EVAL_USER_SUBFOLDER: Final[str] = ".evaluation"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AbstractRunLogger(AbstractContextManager):
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def file_path(self) -> Path:
|
|
24
|
+
"""Get the file path of the logger.
|
|
25
|
+
|
|
26
|
+
:return: The file path of the logger.
|
|
27
|
+
:rtype: Path
|
|
28
|
+
"""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def get_logs(self) -> str:
|
|
33
|
+
"""Get the logs of the run.
|
|
34
|
+
|
|
35
|
+
:return: The logs of the run.
|
|
36
|
+
:rtype: str
|
|
37
|
+
"""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AbstractRunStorage(ABC):
|
|
42
|
+
@property
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def logger(self) -> "AbstractRunLogger":
|
|
45
|
+
"""Get the logger of the run.
|
|
46
|
+
|
|
47
|
+
:return: The logger of the run.
|
|
48
|
+
:rtype: ~promptflow.contracts.run_logger.RunLogger
|
|
49
|
+
"""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
54
|
+
"""Persist results of a batch engine execution (including any errors).
|
|
55
|
+
|
|
56
|
+
:param Optional[BatchResult] result: The result to persist.
|
|
57
|
+
"""
|
|
58
|
+
...
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
62
|
+
"""Load the exception from the storage. If there was no exception, an empty
|
|
63
|
+
mapping will be returned.
|
|
64
|
+
|
|
65
|
+
:return: The exception.
|
|
66
|
+
:rtype: Optional[Exception]
|
|
67
|
+
"""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
72
|
+
"""Load the inputs and outputs from the storage.
|
|
73
|
+
|
|
74
|
+
:return: The inputs and outputs.
|
|
75
|
+
:rtype: Tuple(Mapping[str, Any], BatchResult)
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
81
|
+
"""Load the metrics from the storage.
|
|
82
|
+
|
|
83
|
+
:return: The metrics.
|
|
84
|
+
:rtype: Mapping[str, Union[int, float, str]]
|
|
85
|
+
"""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class NoOpRunStorage(AbstractRunStorage):
|
|
90
|
+
"""A no-op implementation of the run storage."""
|
|
91
|
+
|
|
92
|
+
def __init__(self):
|
|
93
|
+
self._logger = NoOpLogger()
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def logger(self) -> AbstractRunLogger:
|
|
98
|
+
return self._logger
|
|
99
|
+
|
|
100
|
+
def persist_result(self, result: Optional[BatchResult]) -> None:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
def load_exception(self) -> Mapping[str, Any]:
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
def load_inputs_and_outputs(self) -> Tuple[Mapping[str, Any], BatchResult]:
|
|
107
|
+
now = datetime.now(timezone.utc)
|
|
108
|
+
return {}, BatchResult(BatchStatus.NotStarted, 0, 0, now, now, TokenMetrics(0, 0, 0), [])
|
|
109
|
+
|
|
110
|
+
def load_metrics(self) -> Mapping[str, Union[int, float, str]]:
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class NoOpLogger(AbstractRunLogger):
|
|
115
|
+
"""A no-op implementation of the run logger."""
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def file_path(self) -> Path:
|
|
119
|
+
return Path.home() / EVAL_USER_SUBFOLDER
|
|
120
|
+
|
|
121
|
+
def __enter__(self) -> None:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
def __exit__(self, *args) -> None:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def get_logs(self) -> str:
|
|
128
|
+
return ""
|